1 package Perl::Tidy::VerticalAligner;
4 our $VERSION = '20210717';
6 use Perl::Tidy::VerticalAligner::Alignment;
7 use Perl::Tidy::VerticalAligner::Line;
9 # The Perl::Tidy::VerticalAligner package collects output lines and
10 # attempts to line up certain common tokens, such as => and #, which are
11 # identified by the calling routine.
14 # - Initiate an object with a call to new().
15 # - Write lines one-by-one with calls to valign_input().
16 # - Make a final call to flush() to empty the pipeline.
18 # The sub valign_input collects lines into groups. When a group reaches
19 # the maximum possible size it is processed for alignment and output.
20 # The maximum group size is reached whenerver there is a change in indentation
21 # level, a blank line, a block comment, or an external flush call. The calling
22 # routine may also force a break in alignment at any time.
24 # If the calling routine needs to interrupt the output and send other text to
25 # the output, it must first call flush() to empty the output pipeline. This
26 # might occur for example if a block of pod text needs to be sent to the output
27 # between blocks of code.
29 # It is essential that a final call to flush() be made. Otherwise some
30 # final lines of text will be lost.
33 # CODE SECTION 1: Preliminary code, global definitions and sub new
35 # CODE SECTION 2: Some Basic Utilities
36 # CODE SECTION 3: Code to accept input and form groups
38 # CODE SECTION 4: Code to process comment lines
39 # sub _flush_comment_lines
40 # CODE SECTION 5: Code to process groups of code lines
41 # sub _flush_group_lines
42 # CODE SECTION 6: Output Step A
43 # sub valign_output_step_A
44 # CODE SECTION 7: Output Step B
45 # sub valign_output_step_B
46 # CODE SECTION 8: Output Step C
47 # sub valign_output_step_C
48 # CODE SECTION 9: Output Step D
49 # sub valign_output_step_D
50 # CODE SECTION 10: Summary
51 # sub report_anything_unusual
53 ##################################################################
54 # CODE SECTION 1: Preliminary code, global definitions and sub new
55 ##################################################################
59 # Catch any undefined sub calls so that we are sure to get
60 # some diagnostic information. This sub should never be called
61 # except for a programming error.
63 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
64 my ( $pkg, $fname, $lno ) = caller();
65 my $my_package = __PACKAGE__;
67 ======================================================================
68 Error detected in package '$my_package', version $VERSION
69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
70 Called from package: '$pkg'
71 Called from File '$fname' at line '$lno'
72 This error is probably due to a recent programming change
73 ======================================================================
80 # required to avoid call to AUTOLOAD in some versions of perl
85 # Define the fixed indexes for variables in $self, which is an array
86 # reference. Note the convention of leading and trailing underscores to
90 _file_writer_object_ => $i++,
91 _logger_object_ => $i++,
92 _diagnostics_object_ => $i++,
93 _length_function_ => $i++,
96 _rOpts_indent_columns_ => $i++,
98 _rOpts_entab_leading_whitespace_ => $i++,
99 _rOpts_fixed_position_side_comment_ => $i++,
100 _rOpts_minimum_space_to_comment_ => $i++,
101 _rOpts_maximum_line_length_ => $i++,
102 _rOpts_variable_maximum_line_length_ => $i++,
103 _rOpts_valign_ => $i++,
105 _last_level_written_ => $i++,
106 _last_side_comment_column_ => $i++,
107 _last_side_comment_line_number_ => $i++,
108 _last_side_comment_length_ => $i++,
109 _last_side_comment_level_ => $i++,
110 _outdented_line_count_ => $i++,
111 _first_outdented_line_at_ => $i++,
112 _last_outdented_line_at_ => $i++,
113 _consecutive_block_comments_ => $i++,
115 _rgroup_lines_ => $i++,
116 _group_level_ => $i++,
117 _group_type_ => $i++,
118 _zero_count_ => $i++,
119 _last_leading_space_count_ => $i++,
120 _comment_leading_space_count_ => $i++,
123 # Debug flag. This is a relic from the original program development
124 # looking for problems with tab characters. Caution: this debug flag can
125 # produce a lot of output It should be 0 except when debugging small
128 use constant DEBUG_TABS => 0;
130 my $debug_warning = sub {
131 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
135 DEBUG_TABS && $debug_warning->('TABS');
141 my ( $class, @args ) = @_;
145 file_writer_object => undef,
146 logger_object => undef,
147 diagnostics_object => undef,
148 length_function => sub { return length( $_[0] ) },
150 my %args = ( %defaults, @args );
152 # Initialize other caches and buffers
153 initialize_step_B_cache();
154 initialize_valign_buffer();
155 initialize_leading_string_cache();
158 # Initialize all variables in $self.
159 # To add an item to $self, first define a new constant index in the BEGIN
164 $self->[_file_writer_object_] = $args{file_writer_object};
165 $self->[_logger_object_] = $args{logger_object};
166 $self->[_diagnostics_object_] = $args{diagnostics_object};
167 $self->[_length_function_] = $args{length_function};
169 # shortcuts to user options
170 my $rOpts = $args{rOpts};
172 $self->[_rOpts_] = $rOpts;
173 $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
174 $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
175 $self->[_rOpts_entab_leading_whitespace_] =
176 $rOpts->{'entab-leading-whitespace'};
177 $self->[_rOpts_fixed_position_side_comment_] =
178 $rOpts->{'fixed-position-side-comment'};
179 $self->[_rOpts_minimum_space_to_comment_] =
180 $rOpts->{'minimum-space-to-comment'};
181 $self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'};
182 $self->[_rOpts_variable_maximum_line_length_] =
183 $rOpts->{'variable-maximum-line-length'};
184 $self->[_rOpts_valign_] = $rOpts->{'valign'};
186 # Batch of lines being collected
187 $self->[_rgroup_lines_] = [];
188 $self->[_group_level_] = 0;
189 $self->[_group_type_] = "";
190 $self->[_zero_count_] = 0;
191 $self->[_comment_leading_space_count_] = 0;
192 $self->[_last_leading_space_count_] = 0;
194 # Memory of what has been processed
195 $self->[_last_level_written_] = -1;
196 $self->[_last_side_comment_column_] = 0;
197 $self->[_last_side_comment_line_number_] = 0;
198 $self->[_last_side_comment_length_] = 0;
199 $self->[_last_side_comment_level_] = -1;
200 $self->[_outdented_line_count_] = 0;
201 $self->[_first_outdented_line_at_] = 0;
202 $self->[_last_outdented_line_at_] = 0;
203 $self->[_consecutive_block_comments_] = 0;
209 #################################
210 # CODE SECTION 2: Basic Utilities
211 #################################
215 # flush() is the external call to completely empty the pipeline.
218 # push things out the pipline...
220 # push out any current group lines
221 $self->_flush_group_lines();
223 # then anything left in the cache of step_B
224 $self->_flush_cache();
226 # then anything left in the buffer of step_C
227 $self->dump_valign_buffer();
232 sub initialize_for_new_group {
235 $self->[_rgroup_lines_] = [];
236 $self->[_group_type_] = "";
237 $self->[_zero_count_] = 0;
238 $self->[_comment_leading_space_count_] = 0;
239 $self->[_last_leading_space_count_] = 0;
241 # Note that the value for _group_level_ is
242 # handled separately in sub valign_input
246 sub group_line_count {
247 return +@{ $_[0]->[_rgroup_lines_] };
250 # interface to Perl::Tidy::Diagnostics routines
251 # For debugging; not currently used
252 sub write_diagnostics {
253 my ( $self, $msg ) = @_;
254 my $diagnostics_object = $self->[_diagnostics_object_];
255 if ($diagnostics_object) {
256 $diagnostics_object->write_diagnostics($msg);
261 # interface to Perl::Tidy::Logger routines
263 my ( $self, $msg ) = @_;
264 my $logger_object = $self->[_logger_object_];
265 if ($logger_object) {
266 $logger_object->warning($msg);
271 sub write_logfile_entry {
272 my ( $self, $msg ) = @_;
273 my $logger_object = $self->[_logger_object_];
274 if ($logger_object) {
275 $logger_object->write_logfile_entry($msg);
280 sub report_definite_bug {
281 my ( $self, $msg ) = @_;
282 my $logger_object = $self->[_logger_object_];
283 if ($logger_object) {
284 $logger_object->report_definite_bug();
289 sub get_cached_line_count {
291 return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
296 # return the number of leading spaces associated with an indentation
297 # variable $indentation is either a constant number of spaces or an
298 # object with a get_spaces method.
299 my $indentation = shift;
300 return ref($indentation) ? $indentation->get_spaces() : $indentation;
303 sub get_recoverable_spaces {
305 # return the number of spaces (+ means shift right, - means shift left)
306 # that we would like to shift a group of lines with the same indentation
307 # to get them to line up with their opening parens
308 my $indentation = shift;
309 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
312 sub maximum_line_length_for_level {
314 # return maximum line length for line starting with a given level
315 my ( $self, $level ) = @_;
316 my $maximum_line_length = $self->[_rOpts_maximum_line_length_];
317 if ( $self->[_rOpts_variable_maximum_line_length_] ) {
318 if ( $level < 0 ) { $level = 0 }
319 $maximum_line_length += $level * $self->[_rOpts_indent_columns_];
321 return $maximum_line_length;
324 ######################################################
325 # CODE SECTION 3: Code to accept input and form groups
326 ######################################################
328 sub push_group_line {
330 my ( $self, $new_line ) = @_;
331 my $rgroup_lines = $self->[_rgroup_lines_];
332 push @{$rgroup_lines}, $new_line;
336 use constant DEBUG_VALIGN => 0;
337 use constant SC_LONG_LINE_DIFF => 12;
341 # Place one line in the current vertical group.
343 # The input parameters are:
344 # $level = indentation level of this line
345 # $rfields = reference to array of fields
346 # $rpatterns = reference to array of patterns, one per field
347 # $rtokens = reference to array of tokens starting fields 1,2,..
349 # Here is an example of what this package does. In this example,
350 # we are trying to line up both the '=>' and the '#'.
352 # '18' => 'grave', # \`
353 # '19' => 'acute', # `'
354 # '20' => 'caron', # \v
355 # <-tabs-><f1-><--field 2 ---><-f3->
358 # col1 col2 col3 col4
360 # The calling routine has already broken the entire line into 3 fields as
361 # indicated. (So the work of identifying promising common tokens has
362 # already been done).
364 # In this example, there will be 2 tokens being matched: '=>' and '#'.
365 # They are the leading parts of fields 2 and 3, but we do need to know
366 # what they are so that we can dump a group of lines when these tokens
369 # The fields contain the actual characters of each field. The patterns
370 # are like the fields, but they contain mainly token types instead
371 # of tokens, so they have fewer characters. They are used to be
372 # sure we are matching fields of similar type.
374 # In this example, there will be 4 column indexes being adjusted. The
375 # first one is always at zero. The interior columns are at the start of
376 # the matching tokens, and the last one tracks the maximum line length.
378 # Each time a new line comes in, it joins the current vertical
379 # group if possible. Otherwise it causes the current group to be flushed
380 # and a new group is started.
382 # For each new group member, the column locations are increased, as
383 # necessary, to make room for the new fields. When the group is finally
384 # output, these column numbers are used to compute the amount of spaces of
385 # padding needed for each field.
387 # Programming note: the fields are assumed not to have any tab characters.
388 # Tabs have been previously removed except for tabs in quoted strings and
389 # side comments. Tabs in these fields can mess up the column counting.
390 # The log file warns the user if there are any such tabs.
392 my ( $self, $rline_hash ) = @_;
394 my $level = $rline_hash->{level};
395 my $level_end = $rline_hash->{level_end};
396 my $level_adj = $rline_hash->{level_adj};
397 my $indentation = $rline_hash->{indentation};
398 my $list_seqno = $rline_hash->{list_seqno};
399 my $outdent_long_lines = $rline_hash->{outdent_long_lines};
400 my $is_terminal_ternary = $rline_hash->{is_terminal_ternary};
401 my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
402 my $level_jump = $rline_hash->{level_jump};
403 my $rfields = $rline_hash->{rfields};
404 my $rtokens = $rline_hash->{rtokens};
405 my $rpatterns = $rline_hash->{rpatterns};
406 my $rfield_lengths = $rline_hash->{rfield_lengths};
407 my $terminal_block_type = $rline_hash->{terminal_block_type};
408 my $batch_count = $rline_hash->{batch_count};
409 my $break_alignment_before = $rline_hash->{break_alignment_before};
410 my $break_alignment_after = $rline_hash->{break_alignment_after};
411 my $Kend = $rline_hash->{Kend};
412 my $ci_level = $rline_hash->{ci_level};
414 # The index '$Kend' is a value which passed along with the line text to sub
415 # 'write_code_line' for a convergence check.
417 # number of fields is $jmax
418 # number of tokens between fields is $jmax-1
419 my $jmax = @{$rfields} - 1;
421 my $leading_space_count = get_spaces($indentation);
423 # set outdented flag to be sure we either align within statements or
424 # across statement boundaries, but not both.
426 $self->[_last_leading_space_count_] > $leading_space_count;
427 $self->[_last_leading_space_count_] = $leading_space_count;
429 # Identify a hanging side comment. Hanging side comments have an empty
431 my $is_hanging_side_comment =
432 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
434 # Undo outdented flag for a hanging side comment
435 $is_outdented = 0 if $is_hanging_side_comment;
437 # Identify a block comment.
438 my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
440 # Block comment .. update count
441 if ($is_block_comment) {
442 $self->[_consecutive_block_comments_]++;
445 # Not a block comment ..
446 # Forget side comment column if we saw 2 or more block comments,
447 # and reset the count
450 if ( $self->[_consecutive_block_comments_] > 1 ) {
451 $self->forget_side_comment();
453 $self->[_consecutive_block_comments_] = 0;
456 # Reset side comment location if we are entering a new block from level 0.
457 # This is intended to keep them from drifting too far to the right.
458 if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) {
459 $self->forget_side_comment();
462 my $group_level = $self->[_group_level_];
465 my $nlines = $self->group_line_count();
467 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
470 # Validate cached line if necessary: If we can produce a container
471 # with just 2 lines total by combining an existing cached opening
472 # token with the closing token to follow, then we will mark both
473 # cached flags as valid.
474 my $cached_line_type = get_cached_line_type();
475 if ($cached_line_type) {
476 my $cached_line_flag = get_cached_line_flag();
477 if ($rvertical_tightness_flags) {
478 my $cached_seqno = get_cached_seqno();
480 && $self->group_line_count() <= 1
481 && $rvertical_tightness_flags->[2]
482 && $rvertical_tightness_flags->[2] == $cached_seqno )
484 $rvertical_tightness_flags->[3] ||= 1;
485 set_cached_line_valid(1);
489 # do not join an opening block brace with an unbalanced line
490 # unless requested with a flag value of 2
491 if ( $cached_line_type == 3
492 && !$self->group_line_count()
493 && $cached_line_flag < 2
494 && $level_jump != 0 )
496 set_cached_line_valid(0);
501 if ( $level < 0 ) { $level = 0 }
503 # do not align code across indentation level changes
504 # or if vertical alignment is turned off for debugging
505 if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) {
507 $self->_flush_group_lines( $level - $group_level );
509 $group_level = $level;
510 $self->[_group_level_] = $group_level;
512 # wait until after the above flush to get the leading space
513 # count because it may have been changed if the -icp flag is in
515 $leading_space_count = get_spaces($indentation);
519 # --------------------------------------------------------------------
520 # Collect outdentable block COMMENTS
521 # --------------------------------------------------------------------
522 my $is_blank_line = "";
523 if ( $self->[_group_type_] eq 'COMMENT' ) {
527 && $outdent_long_lines
528 && $leading_space_count ==
529 $self->[_comment_leading_space_count_]
535 # Note that for a comment group we are not storing a line
536 # but rather just the text and its length.
537 $self->push_group_line(
538 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
542 $self->_flush_group_lines();
546 my $rgroup_lines = $self->[_rgroup_lines_];
547 if ( $break_alignment_before && @{$rgroup_lines} ) {
548 $rgroup_lines->[-1]->set_end_group(1);
551 # --------------------------------------------------------------------
552 # add dummy fields for terminal ternary
553 # --------------------------------------------------------------------
554 my $j_terminal_match;
556 if ( $is_terminal_ternary && @{$rgroup_lines} ) {
558 fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
559 $rpatterns, $rfield_lengths, $group_level, );
560 $jmax = @{$rfields} - 1;
563 # --------------------------------------------------------------------
564 # add dummy fields for else statement
565 # --------------------------------------------------------------------
567 # Note the trailing space after 'else' here. If there were no space between
568 # the else and the next '{' then we would not be able to do vertical
569 # alignment of the '{'.
570 if ( $rfields->[0] eq 'else '
572 && $level_jump == 0 )
576 fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
577 $rpatterns, $rfield_lengths );
578 $jmax = @{$rfields} - 1;
581 # --------------------------------------------------------------------
582 # Handle simple line of code with no fields to match.
583 # --------------------------------------------------------------------
585 $self->[_zero_count_]++;
587 if ( @{$rgroup_lines}
588 && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
592 # flush the current group if it has some aligned columns..
593 # or we haven't seen a comment lately
594 if ( $rgroup_lines->[0]->get_jmax() > 1
595 || $self->[_zero_count_] > 3 )
597 $self->_flush_group_lines();
601 # start new COMMENT group if this comment may be outdented
602 if ( $is_block_comment
603 && $outdent_long_lines
604 && !$self->group_line_count() )
606 $self->[_group_type_] = 'COMMENT';
607 $self->[_comment_leading_space_count_] = $leading_space_count;
608 $self->push_group_line(
609 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
613 # just write this line directly if no current group, no side comment,
614 # and no space recovery is needed.
615 if ( !$self->group_line_count()
616 && !get_recoverable_spaces($indentation) )
619 $self->valign_output_step_B(
621 leading_space_count => $leading_space_count,
622 line => $rfields->[0],
623 line_length => $rfield_lengths->[0],
624 side_comment_length => 0,
625 outdent_long_lines => $outdent_long_lines,
626 rvertical_tightness_flags => $rvertical_tightness_flags,
628 level_end => $level_end,
637 $self->[_zero_count_] = 0;
640 my $maximum_line_length_for_level =
641 $self->maximum_line_length_for_level($level);
643 # --------------------------------------------------------------------
644 # It simplifies things to create a zero length side comment
646 # --------------------------------------------------------------------
647 $self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths );
648 $jmax = @{$rfields} - 1;
650 # --------------------------------------------------------------------
651 # create an object to hold this line
652 # --------------------------------------------------------------------
653 my $new_line = Perl::Tidy::VerticalAligner::Line->new(
658 rpatterns => $rpatterns,
659 rfield_lengths => $rfield_lengths,
660 indentation => $indentation,
661 leading_space_count => $leading_space_count,
662 outdent_long_lines => $outdent_long_lines,
663 list_seqno => $list_seqno,
665 is_hanging_side_comment => $is_hanging_side_comment,
666 maximum_line_length => $maximum_line_length_for_level,
667 rvertical_tightness_flags => $rvertical_tightness_flags,
668 is_terminal_ternary => $is_terminal_ternary,
669 j_terminal_match => $j_terminal_match,
670 end_group => $break_alignment_after,
672 ci_level => $ci_level,
674 level_end => $level_end,
679 # --------------------------------------------------------------------
680 # Decide if this is a simple list of items.
681 # We use this to be less restrictive in deciding what to align.
682 # --------------------------------------------------------------------
683 decide_if_list($new_line) if ($list_seqno);
685 # --------------------------------------------------------------------
686 # Append this line to the current group (or start new group)
687 # --------------------------------------------------------------------
689 $self->push_group_line($new_line);
691 # output this group if it ends in a terminal else or ternary line
692 if ( defined($j_terminal_match) ) {
693 $self->_flush_group_lines();
696 # Force break after jump to lower level
697 if ( $level_jump < 0 ) {
698 $self->_flush_group_lines($level_jump);
701 # --------------------------------------------------------------------
702 # Some old debugging stuff
703 # --------------------------------------------------------------------
705 print STDOUT "exiting valign_input fields:";
706 dump_array( @{$rfields} );
707 print STDOUT "exiting valign_input tokens:";
708 dump_array( @{$rtokens} );
709 print STDOUT "exiting valign_input patterns:";
710 dump_array( @{$rpatterns} );
716 sub join_hanging_comment {
718 # Add dummy fields to a hanging side comment to make it look
719 # like the first line in its potential group. This simplifies
721 my ( $new_line, $old_line ) = @_;
723 my $jmax = $new_line->get_jmax();
726 return 0 unless $jmax == 1;
727 my $rtokens = $new_line->get_rtokens();
729 # the second field must be a comment
730 return 0 unless $rtokens->[0] eq '#';
731 my $rfields = $new_line->get_rfields();
733 # the first field must be empty
734 return 0 unless $rfields->[0] =~ /^\s*$/;
736 # the current line must have fewer fields
737 my $maximum_field_index = $old_line->get_jmax();
739 unless $maximum_field_index > $jmax;
742 my $rpatterns = $new_line->get_rpatterns();
743 my $rfield_lengths = $new_line->get_rfield_lengths();
745 $new_line->set_is_hanging_side_comment(1);
746 $jmax = $maximum_field_index;
747 $new_line->set_jmax($jmax);
748 $rfields->[$jmax] = $rfields->[1];
749 $rfield_lengths->[$jmax] = $rfield_lengths->[1];
750 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
751 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
752 foreach my $j ( 1 .. $jmax - 1 ) {
754 $rfield_lengths->[$j] = 0;
755 $rtokens->[ $j - 1 ] = "";
756 $rpatterns->[ $j - 1 ] = "";
761 sub make_side_comment {
763 # create an empty side comment if none exists
765 my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_;
767 my $jmax = @{$rfields} - 1;
769 # if line does not have a side comment...
770 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
772 $rtokens->[ $jmax - 1 ] = '#';
773 $rfields->[$jmax] = '';
774 $rfield_lengths->[$jmax] = 0;
775 $rpatterns->[$jmax] = '#';
780 { ## closure for sub decide_if_list
788 @is_comma_token{@q} = (1) x scalar(@q);
795 # A list will be taken to be a line with a forced break in which all
796 # of the field separators are commas or comma-arrows (except for the
799 my $rtokens = $line->get_rtokens();
800 my $test_token = $rtokens->[0];
801 my ( $raw_tok, $lev, $tag, $tok_count ) =
802 decode_alignment_token($test_token);
803 if ( $is_comma_token{$raw_tok} ) {
804 my $list_type = $test_token;
805 my $jmax = $line->get_jmax();
807 foreach ( 1 .. $jmax - 2 ) {
808 ( $raw_tok, $lev, $tag, $tok_count ) =
809 decode_alignment_token( $rtokens->[$_] );
810 if ( !$is_comma_token{$raw_tok} ) {
815 $line->set_list_type($list_type);
821 sub fix_terminal_ternary {
823 # Add empty fields as necessary to align a ternary term
832 # returns the index of the terminal question token, if any
834 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
838 return unless ($old_line);
839 use constant EXPLAIN_TERNARY => 0;
841 my $jmax = @{$rfields} - 1;
842 my $rfields_old = $old_line->get_rfields();
844 my $rpatterns_old = $old_line->get_rpatterns();
845 my $rtokens_old = $old_line->get_rtokens();
846 my $maximum_field_index = $old_line->get_jmax();
848 # look for the question mark after the :
853 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
854 my $tok = $rtokens_old->[$j];
855 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
856 if ( $raw_tok eq '?' ) {
857 $depth_question = $lev;
859 # depth must be correct
860 next unless ( $depth_question eq $group_level );
863 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
864 $pad_length = length($1);
865 $pad = " " x $pad_length;
868 return; # shouldn't happen
873 return unless ( defined($jquestion) ); # shouldn't happen
875 # Now splice the tokens and patterns of the previous line
876 # into the else line to insure a match. Add empty fields
878 my $jadd = $jquestion;
880 # Work on copies of the actual arrays in case we have
881 # to return due to an error
882 my @fields = @{$rfields};
883 my @patterns = @{$rpatterns};
884 my @tokens = @{$rtokens};
885 my @field_lengths = @{$rfield_lengths};
887 EXPLAIN_TERNARY && do {
889 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
890 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
891 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
892 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
893 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
894 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
897 # handle cases of leading colon on this line
898 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
900 my ( $colon, $therest ) = ( $1, $2 );
902 # Handle sub-case of first field with leading colon plus additional code
903 # This is the usual situation as at the '1' below:
909 # Split the first field after the leading colon and insert padding.
910 # Note that this padding will remain even if the terminal value goes
911 # out on a separate line. This does not seem to look to bad, so no
912 # mechanism has been included to undo it.
913 my $field1 = shift @fields;
914 my $field_length1 = shift @field_lengths;
915 my $len_colon = length($colon);
916 unshift @fields, ( $colon, $pad . $therest );
917 unshift @field_lengths,
918 ( $len_colon, $pad_length + $field_length1 - $len_colon );
920 # change the leading pattern from : to ?
921 return unless ( $patterns[0] =~ s/^\:/?/ );
923 # install leading tokens and patterns of existing line
924 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
925 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
927 # insert appropriate number of empty fields
928 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
929 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
932 # handle sub-case of first field just equal to leading colon.
933 # This can happen for example in the example below where
934 # the leading '(' would create a new alignment token
935 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
936 # : ( $mname = $name . '->' );
939 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
941 # prepend a leading ? onto the second pattern
942 $patterns[1] = "?b" . $patterns[1];
944 # pad the second field
945 $fields[1] = $pad . $fields[1];
946 $field_lengths[1] = $pad_length + $field_lengths[1];
948 # install leading tokens and patterns of existing line, replacing
949 # leading token and inserting appropriate number of empty fields
950 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
951 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
952 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
953 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
957 # Handle case of no leading colon on this line. This will
958 # be the case when -wba=':' is used. For example,
963 # install leading tokens and patterns of existing line
964 $patterns[0] = '?' . 'b' . $patterns[0];
965 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
966 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
968 # insert appropriate number of empty fields
969 $jadd = $jquestion + 1;
970 $fields[0] = $pad . $fields[0];
971 $field_lengths[0] = $pad_length + $field_lengths[0];
972 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
973 splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
976 EXPLAIN_TERNARY && do {
978 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
979 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
980 print STDOUT "MODIFIED FIELDS=<@fields>\n";
983 # all ok .. update the arrays
984 @{$rfields} = @fields;
985 @{$rtokens} = @tokens;
986 @{$rpatterns} = @patterns;
987 @{$rfield_lengths} = @field_lengths;
989 # force a flush after this line
993 sub fix_terminal_else {
995 # Add empty fields as necessary to align a balanced terminal
996 # else block to a previous if/elsif/unless block,
999 # if ( 1 || $x ) { print "ok 13\n"; }
1000 # else { print "not ok 13\n"; }
1002 # returns a positive value if the else block should be indented
1004 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1006 return unless ($old_line);
1007 my $jmax = @{$rfields} - 1;
1008 return unless ( $jmax > 0 );
1010 # check for balanced else block following if/elsif/unless
1011 my $rfields_old = $old_line->get_rfields();
1013 # TBD: add handling for 'case'
1014 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1016 # look for the opening brace after the else, and extract the depth
1017 my $tok_brace = $rtokens->[0];
1019 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1021 # probably: "else # side_comment"
1024 my $rpatterns_old = $old_line->get_rpatterns();
1025 my $rtokens_old = $old_line->get_rtokens();
1026 my $maximum_field_index = $old_line->get_jmax();
1028 # be sure the previous if/elsif is followed by an opening paren
1030 my $tok_paren = '(' . $depth_brace;
1031 my $tok_test = $rtokens_old->[$jparen];
1032 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
1034 # Now find the opening block brace
1036 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1037 my $tok = $rtokens_old->[$j];
1038 if ( $tok eq $tok_brace ) {
1043 return unless ( defined($jbrace) ); # shouldn't happen
1045 # Now splice the tokens and patterns of the previous line
1046 # into the else line to insure a match. Add empty fields
1048 my $jadd = $jbrace - $jparen;
1049 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1050 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1051 splice( @{$rfields}, 1, 0, ('') x $jadd );
1052 splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1054 # force a flush after this line if it does not follow a case
1055 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1056 else { return $jbrace }
1059 my %is_closing_block_type;
1063 @is_closing_block_type{@_} = (1) x scalar(@_);
1068 # See if the current line matches the current vertical alignment group.
1070 my ( $self, $new_line, $base_line, $prev_line ) = @_;
1073 # $new_line = the line being considered for group inclusion
1074 # $base_line = the first line of the current group
1075 # $prev_line = the line just before $new_line
1077 # returns a flag and a value as follows:
1078 # return (0, $imax_align) if the line does not match
1079 # return (1, $imax_align) if the line matches but does not fit
1080 # return (2, $imax_align) if the line matches and fits
1082 # Returns '$imax_align' which is the index of the maximum matching token.
1083 # It will be used in the subsequent left-to-right sweep to align as many
1084 # tokens as possible for lines which partially match.
1085 my $imax_align = -1;
1087 # variable $GoToMsg explains reason for no match, for debugging
1089 use constant EXPLAIN_CHECK_MATCH => 0;
1091 # This is a flag for testing alignment by sub sweep_left_to_right only.
1092 # This test can help find problems with the alignment logic.
1093 # This flag should normally be zero.
1094 use constant TEST_SWEEP_ONLY => 0;
1096 my $jmax = $new_line->get_jmax();
1097 my $maximum_field_index = $base_line->get_jmax();
1099 my $jlimit = $jmax - 2;
1100 if ( $jmax > $maximum_field_index ) {
1101 $jlimit = $maximum_field_index - 2;
1104 if ( $new_line->get_is_hanging_side_comment() ) {
1106 # HSC's can join the group if they fit
1112 # A group with hanging side comments ends with the first non hanging
1114 if ( $base_line->get_is_hanging_side_comment() ) {
1115 $GoToMsg = "end of hanging side comments";
1119 # The number of tokens that this line shares with the previous line
1120 # has been stored with the previous line. This value was calculated
1121 # and stored by sub 'match_line_pair'.
1122 $imax_align = $prev_line->get_imax_pair();
1124 if ( $imax_align != $jlimit ) {
1125 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1131 # The tokens match, but the lines must have identical number of
1132 # tokens to join the group.
1133 if ( $maximum_field_index != $jmax ) {
1134 $GoToMsg = "token count differs";
1138 # The tokens match. Now See if there is space for this line in the
1140 if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
1143 && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1144 return ( 2, $jlimit );
1149 && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1150 return ( 1, $jlimit );
1157 "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1159 return ( 0, $imax_align );
1164 my ( $self, $new_line, $old_line ) = @_;
1166 # The new line has alignments identical to the current group. Now we have
1167 # to fit the new line into the group without causing a field to exceed the
1168 # line length limit.
1169 # return true if successful
1170 # return false if not successful
1172 my $jmax = $new_line->get_jmax();
1173 my $leading_space_count = $new_line->get_leading_space_count();
1174 my $rfield_lengths = $new_line->get_rfield_lengths();
1175 my $padding_available = $old_line->get_available_space_on_right();
1176 my $jmax_old = $old_line->get_jmax();
1178 # Safety check ... only lines with equal array sizes should arrive here
1179 # from sub check_match. So if this error occurs, look at recent changes in
1180 # sub check_match. It is only supposed to check the fit of lines with
1181 # identical numbers of alignment tokens.
1182 if ( $jmax_old ne $jmax ) {
1184 $self->warning(<<EOM);
1185 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1186 unexpected difference in array lengths: $jmax != $jmax_old
1191 # Save current columns in case this line does not fit.
1192 my @alignments = $old_line->get_alignments();
1193 foreach my $alignment (@alignments) {
1194 $alignment->save_column();
1197 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
1199 # Loop over all alignments ...
1200 my $maximum_field_index = $old_line->get_jmax();
1201 for my $j ( 0 .. $jmax ) {
1203 my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1206 $pad += $leading_space_count;
1209 # Keep going if this field does not need any space.
1212 # See if it needs too much space.
1213 if ( $pad > $padding_available ) {
1215 ################################################
1216 # Line does not fit -- revert to starting state
1217 ################################################
1218 foreach my $alignment (@alignments) {
1219 $alignment->restore_column();
1224 # make room for this field
1225 $old_line->increase_field_width( $j, $pad );
1226 $padding_available -= $pad;
1229 ######################################
1230 # The line fits, the match is accepted
1231 ######################################
1236 sub install_new_alignments {
1238 my ($new_line) = @_;
1240 my $jmax = $new_line->get_jmax();
1241 my $rfield_lengths = $new_line->get_rfield_lengths();
1242 my $col = $new_line->get_leading_space_count();
1244 for my $j ( 0 .. $jmax ) {
1245 $col += $rfield_lengths->[$j];
1247 # create initial alignments for the new group
1249 Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1250 $new_line->set_alignment( $j, $alignment );
1255 sub copy_old_alignments {
1256 my ( $new_line, $old_line ) = @_;
1257 my @new_alignments = $old_line->get_alignments();
1258 $new_line->set_alignments(@new_alignments);
1264 # debug routine to dump array contents
1266 print STDOUT "(@_)\n";
1272 # compute decrease in level when we remove $diff spaces from the
1274 my ( $self, $leading_space_count, $diff, $level ) = @_;
1276 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1277 if ($rOpts_indent_columns) {
1279 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1280 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1281 $level -= ( $olev - $nlev );
1282 if ( $level < 0 ) { $level = 0 }
1287 ###############################################
1288 # CODE SECTION 4: Code to process comment lines
1289 ###############################################
1291 sub _flush_comment_lines {
1293 # Output a group consisting of COMMENT lines
1296 my $rgroup_lines = $self->[_rgroup_lines_];
1297 return unless ( @{$rgroup_lines} );
1298 my $group_level = $self->[_group_level_];
1299 my $leading_space_count = $self->[_comment_leading_space_count_];
1300 my $leading_string =
1301 $self->get_leading_string( $leading_space_count, $group_level );
1303 # look for excessively long lines
1305 foreach my $item ( @{$rgroup_lines} ) {
1306 my ( $str, $str_len ) = @{$item};
1309 $leading_space_count -
1310 $self->maximum_line_length_for_level($group_level);
1311 if ( $excess > $max_excess ) {
1312 $max_excess = $excess;
1316 # zero leading space count if any lines are too long
1317 if ( $max_excess > 0 ) {
1318 $leading_space_count -= $max_excess;
1319 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1320 my $file_writer_object = $self->[_file_writer_object_];
1321 my $last_outdented_line_at =
1322 $file_writer_object->get_output_line_number();
1323 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
1324 my $outdented_line_count = $self->[_outdented_line_count_];
1325 unless ($outdented_line_count) {
1326 $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1328 my $nlines = @{$rgroup_lines};
1329 $outdented_line_count += $nlines;
1330 $self->[_outdented_line_count_] = $outdented_line_count;
1334 my $outdent_long_lines = 0;
1336 foreach my $item ( @{$rgroup_lines} ) {
1337 my ( $str, $str_len, $Kend ) = @{$item};
1338 $self->valign_output_step_B(
1340 leading_space_count => $leading_space_count,
1342 line_length => $str_len,
1343 side_comment_length => 0,
1344 outdent_long_lines => $outdent_long_lines,
1345 rvertical_tightness_flags => "",
1346 level => $group_level,
1347 level_end => $group_level,
1353 $self->initialize_for_new_group();
1357 ######################################################
1358 # CODE SECTION 5: Code to process groups of code lines
1359 ######################################################
1361 sub _flush_group_lines {
1363 # This is the vertical aligner internal flush, which leaves the cache
1365 my ( $self, $level_jump ) = @_;
1367 # $level_jump = $next_level-$group_level, if known
1368 # = undef if not known
1370 my $rgroup_lines = $self->[_rgroup_lines_];
1371 return unless ( @{$rgroup_lines} );
1372 my $group_type = $self->[_group_type_];
1373 my $group_level = $self->[_group_level_];
1377 my ( $a, $b, $c ) = caller();
1378 my $nlines = @{$rgroup_lines};
1380 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1383 ############################################
1384 # Section 1: Handle a group of COMMENT lines
1385 ############################################
1386 if ( $group_type eq 'COMMENT' ) {
1387 $self->_flush_comment_lines();
1391 #########################################################################
1392 # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
1393 # aligning happens here in the following steps:
1394 #########################################################################
1396 # STEP 1: Remove most unmatched tokens. They block good alignments.
1397 my ( $max_lev_diff, $saw_side_comment ) =
1398 delete_unmatched_tokens( $rgroup_lines, $group_level );
1400 # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1401 # matching common alignments. The indexes of these subgroups are in the
1403 my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1405 # STEP 3: Sweep left to right through the lines, looking for leading
1406 # alignment tokens shared by groups.
1407 sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1408 if ( @{$rgroups} > 1 );
1410 # STEP 4: Move side comments to a common column if possible.
1411 if ($saw_side_comment) {
1412 $self->align_side_comments( $rgroup_lines, $rgroups );
1415 # STEP 5: For the -lp option, increase the indentation of lists
1416 # to the desired amount, but do not exceed the line length limit.
1418 # We are allowed to shift a group of lines to the right if:
1419 # (1) its level is greater than the level of the previous group, and
1420 # (2) its level is greater than the level of the next line to be written.
1422 my $extra_indent_ok;
1423 if ( $group_level > $self->[_last_level_written_] ) {
1425 # Use the level jump to next line to come, if given
1426 if ( defined($level_jump) ) {
1427 $extra_indent_ok = $level_jump < 0;
1430 # Otherwise, assume the next line has the level of the end of last line.
1431 # This fixes case c008.
1433 my $level_end = $rgroup_lines->[-1]->get_level_end();
1434 $extra_indent_ok = $group_level > $level_end;
1438 my $extra_leading_spaces =
1440 ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1443 # STEP 6: Output the lines.
1444 # All lines in this batch have the same basic leading spacing:
1445 my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
1447 foreach my $line ( @{$rgroup_lines} ) {
1448 $self->valign_output_step_A(
1453 group_leader_length => $group_leader_length,
1454 extra_leading_spaces => $extra_leading_spaces,
1455 level => $group_level,
1460 $self->initialize_for_new_group();
1464 { ## closure for sub sweep_top_down
1466 my $rall_lines; # all of the lines
1467 my $grp_level; # level of all lines
1468 my $rgroups; # describes the partition of lines we will make here
1469 my $group_line_count; # number of lines in current partition
1471 BEGIN { $rgroups = [] }
1473 sub initialize_for_new_rgroup {
1474 $group_line_count = 0;
1481 my $rline = $rall_lines->[$jend];
1484 if ( $group_line_count == 0 ) {
1485 install_new_alignments($rline);
1488 my $rvals = pop @{$rgroups};
1489 $jbeg = $rvals->[0];
1490 copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1492 push @{$rgroups}, [ $jbeg, $jend, undef ];
1493 $group_line_count++;
1497 sub get_rgroup_jrange {
1499 return unless @{$rgroups};
1500 return unless ( $group_line_count > 0 );
1501 my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1502 return ( $jbeg, $jend );
1507 my ($imax_align) = @_;
1508 return unless @{$rgroups};
1509 return unless ( $group_line_count > 0 );
1511 my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1512 push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1514 # Undo some alignments of poor two-line combinations.
1515 # We had to wait until now to know the line count.
1516 if ( $jend - $jbeg == 1 ) {
1517 my $line_0 = $rall_lines->[$jbeg];
1518 my $line_1 = $rall_lines->[$jend];
1520 my $imax_pair = $line_1->get_imax_pair();
1521 if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1523 ## flag for possible future use:
1524 ## my $is_isolated_pair = $imax_pair < 0
1526 ## || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
1529 $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
1531 my ( $is_marginal, $imax_align_fix ) =
1532 is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1535 combine_fields( $line_0, $line_1, $imax_align_fix );
1539 initialize_for_new_rgroup();
1543 sub block_penultimate_match {
1545 # emergency reset to prevent sweep_left_to_right from trying to match a
1546 # failed terminal else match
1547 return unless @{$rgroups} > 1;
1548 $rgroups->[-2]->[2] = -1;
1552 sub sweep_top_down {
1553 my ( $self, $rlines, $group_level ) = @_;
1555 # Partition the set of lines into final alignment subgroups
1556 # and store the alignments with the lines.
1558 # The alignment subgroups we are making here are groups of consecutive
1559 # lines which have (1) identical alignment tokens and (2) do not
1560 # exceed the allowable maximum line length. A later sweep from
1561 # left-to-right ('sweep_lr') will handle additional alignments.
1563 # transfer args to closure variables
1564 $rall_lines = $rlines;
1565 $grp_level = $group_level;
1567 initialize_for_new_rgroup();
1568 return unless @{$rlines}; # shouldn't happen
1570 # Unset the _end_group flag for the last line if it it set because it
1571 # is not needed and can causes problems for -lp formatting
1572 $rall_lines->[-1]->set_end_group(0);
1574 # Loop over all lines ...
1576 foreach my $new_line ( @{$rall_lines} ) {
1579 # Start a new subgroup if necessary
1580 if ( !$group_line_count ) {
1581 add_to_rgroup($jline);
1582 if ( $new_line->get_end_group() ) {
1588 my $j_terminal_match = $new_line->get_j_terminal_match();
1589 my ( $jbeg, $jend ) = get_rgroup_jrange();
1590 if ( !defined($jbeg) ) {
1592 # safety check, shouldn't happen
1593 $self->warning(<<EOM);
1594 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1595 undefined index for group line count $group_line_count
1599 my $base_line = $rall_lines->[$jbeg];
1601 # Initialize a global flag saying if the last line of the group
1602 # should match end of group and also terminate the group. There
1603 # should be no returns between here and where the flag is handled
1605 my $col_matching_terminal = 0;
1606 if ( defined($j_terminal_match) ) {
1608 # remember the column of the terminal ? or { to match with
1609 $col_matching_terminal =
1610 $base_line->get_column($j_terminal_match);
1612 # Ignore an undefined value as a defensive step; shouldn't
1614 $col_matching_terminal = 0
1615 unless defined($col_matching_terminal);
1618 # -------------------------------------------------------------
1619 # Allow hanging side comment to join current group, if any. The
1620 # only advantage is to keep the other tokens in the same group. For
1621 # example, this would make the '=' align here:
1622 # $ax = 1; # side comment
1623 # # hanging side comment
1624 # $boondoggle = 5; # side comment
1625 # $beetle = 5; # side comment
1627 # here is another example..
1629 # _rtoc_name_count => {}, # hash to track ..
1630 # _rpackage_stack => [], # stack to check ..
1632 # _rlast_level => \$last_level, # brace indentation
1635 # If this were not desired, the next step could be skipped.
1636 # -------------------------------------------------------------
1637 if ( $new_line->get_is_hanging_side_comment() ) {
1638 join_hanging_comment( $new_line, $base_line );
1641 # If this line has no matching tokens, then flush out the lines
1642 # BEFORE this line unless both it and the previous line have side
1643 # comments. This prevents this line from pushing side coments out
1645 elsif ( $new_line->get_jmax() == 1 ) {
1647 # There are no matching tokens, so now check side comments.
1648 # Programming note: accessing arrays with index -1 is
1649 # risky in Perl, but we have verified there is at least one
1650 # line in the group and that there is at least one field.
1652 $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
1653 my $side_comment = $new_line->get_rfields()->[-1];
1654 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1657 # See if the new line matches and fits the current group,
1658 # if it still exists. Flush the current group if not.
1660 if ($group_line_count) {
1661 ( $match_code, my $imax_align ) =
1662 $self->check_match( $new_line, $base_line,
1663 $rall_lines->[ $jline - 1 ] );
1664 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1667 # Store the new line
1668 add_to_rgroup($jline);
1670 if ( defined($j_terminal_match) ) {
1672 # Decide if we should fix a terminal match. We can either:
1673 # 1. fix it and prevent the sweep_lr from changing it, or
1674 # 2. leave it alone and let sweep_lr try to fix it.
1676 # The current logic is to fix it if:
1677 # -it has not joined to previous lines,
1678 # -and either the previous subgroup has just 1 line, or
1679 # -this line matched but did not fit (so sweep won't work)
1681 if ( $group_line_count == 1 ) {
1682 $fixit ||= $match_code;
1684 if ( @{$rgroups} > 1 ) {
1685 my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1686 my $nlines = $jendx - $jbegx + 1;
1687 $fixit ||= $nlines <= 1;
1693 $base_line = $new_line;
1694 my $col_now = $base_line->get_column($j_terminal_match);
1696 # Ignore an undefined value as a defensive step; shouldn't
1698 $col_now = 0 unless defined($col_now);
1700 my $pad = $col_matching_terminal - $col_now;
1701 my $padding_available =
1702 $base_line->get_available_space_on_right();
1703 if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1704 $base_line->increase_field_width( $j_terminal_match,
1708 # do not let sweep_left_to_right change an isolated 'else'
1709 if ( !$new_line->get_is_terminal_ternary() ) {
1710 block_penultimate_match();
1716 # end the group if we know we cannot match next line.
1717 elsif ( $new_line->get_end_group() ) {
1720 } ## end loop over lines
1729 my ( $line_m, $line, $imax_min ) = @_;
1732 # two isolated (list) lines
1733 # imax_min = number of common alignment tokens
1735 # $pad_max = maximum suggested pad distnce
1736 # = 0 if alignment not recommended
1737 # Note that this is only for two lines which do not have alignment tokens
1738 # in common with any other lines. It is intended for lists, but it might
1739 # also be used for two non-list lines with a common leading '='.
1741 # Allow alignment if the difference in the two unpadded line lengths
1742 # is not more than either line length. The idea is to avoid
1743 # aligning lines with very different field lengths, like these two:
1746 # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
1747 # 1, 0, 0, 0, undef, 0, 0
1749 my $rfield_lengths = $line->get_rfield_lengths();
1750 my $rfield_lengths_m = $line_m->get_rfield_lengths();
1752 # Safety check - shouldn't happen
1754 unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
1758 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
1759 $lensum_m += $rfield_lengths_m->[$i];
1760 $lensum += $rfield_lengths->[$i];
1763 my ( $lenmin, $lenmax ) =
1764 $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
1767 if ( $line_m->get_list_type() && $line->get_list_type() ) {
1768 $patterns_match = 1;
1769 my $rpatterns_m = $line_m->get_rpatterns();
1770 my $rpatterns = $line->get_rpatterns();
1771 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
1772 my $pat = $rpatterns->[$i];
1773 my $pat_m = $rpatterns_m->[$i];
1774 if ( $pat ne $pat_m ) { $patterns_match = 0; last }
1778 my $pad_max = $lenmax;
1779 if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
1784 sub sweep_left_to_right {
1786 my ( $rlines, $rgroups, $group_level ) = @_;
1788 # So far we have divided the lines into groups having an equal number of
1789 # identical alignments. Here we are going to look for common leading
1790 # alignments between the different groups and align them when possible.
1791 # For example, the three lines below are in three groups because each line
1792 # has a different number of commas. In this routine we will sweep from
1793 # left to right, aligning the leading commas as we go, but stopping if we
1794 # hit the line length limit.
1796 # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
1797 # my ( $i, $j, $error, $aff, $asum, $avec );
1798 # my ( $km, $area, $varea );
1800 # nothing to do if just one group
1801 my $ng_max = @{$rgroups} - 1;
1802 return unless ( $ng_max > 0 );
1804 ############################################################################
1805 # Step 1: Loop over groups to find all common leading alignment tokens
1806 ############################################################################
1810 my $imax; # index of maximum non-side-comment alignment token
1811 my $istop; # an optional stopping index
1812 my $jbeg; # starting line index
1813 my $jend; # ending line index
1824 # Look at neighboring pairs of groups and form a simple list
1825 # of all common leading alignment tokens. Foreach such match we
1826 # store [$i, $ng], where
1827 # $i = index of the token in the line (0,1,...)
1828 # $ng is the second of the two groups with this common token
1831 # Hash to hold the maximum alignment change for any group
1834 # a small number of columns
1838 foreach my $item ( @{$rgroups} ) {
1841 $istop_mm = $istop_m;
1843 # save _m values of previous group
1845 $rtokens_m = $rtokens;
1851 # Get values for this group. Note that we just have to use values for
1852 # one of the lines of the group since all members have the same
1854 ( $jbeg, $jend, $istop ) = @{$item};
1856 $line = $rlines->[$jbeg];
1857 $rtokens = $line->get_rtokens();
1858 $imax = $line->get_jmax() - 2;
1859 $istop = -1 unless ( defined($istop) );
1860 $istop = $imax if ( $istop > $imax );
1862 # Initialize on first group
1863 next if ( $ng == 0 );
1865 # Use the minimum index limit of the two groups
1866 my $imax_min = $imax > $imax_m ? $imax_m : $imax;
1868 # Also impose a limit if given.
1869 if ( $istop_m < $imax_min ) {
1870 $imax_min = $istop_m;
1873 # Special treatment of two one-line groups isolated from other lines,
1874 # unless they form a simple list or a terminal match. Otherwise the
1875 # alignment can look strange in some cases.
1876 my $list_type = $rlines->[$jbeg]->get_list_type();
1879 && $jend_m == $jbeg_m
1880 && ( $ng == 1 || $istop_mm < 0 )
1881 && ( $ng == $ng_max || $istop < 0 )
1882 && !$line->get_j_terminal_match()
1884 # Only do this for imperfect matches. This is normally true except
1885 # when two perfect matches cannot form a group because the line
1886 # length limit would be exceeded. In that case we can still try
1887 # to match as many alignments as possible.
1888 && ( $imax != $imax_m || $istop_m != $imax_m )
1892 # We will just align assignments and simple lists
1893 next unless ( $imax_min >= 0 );
1895 unless ( $rtokens->[0] =~ /^=\d/
1898 # In this case we will limit padding to a short distance. This
1899 # is a compromise to keep some vertical alignment but prevent large
1900 # gaps, which do not look good for just two lines.
1902 two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
1903 next unless ($pad_max);
1905 $max_move{"$ng_m"} = $pad_max;
1906 $max_move{"$ng"} = $pad_max;
1909 # Loop to find all common leading tokens.
1910 if ( $imax_min >= 0 ) {
1911 foreach my $i ( 0 .. $imax_min ) {
1912 my $tok = $rtokens->[$i];
1913 my $tok_m = $rtokens_m->[$i];
1914 last if ( $tok ne $tok_m );
1915 push @icommon, [ $i, $ng, $tok ];
1919 return unless @icommon;
1921 ###########################################################
1922 # Step 2: Reorder and consolidate the list into a task list
1923 ###########################################################
1925 # We have to work first from lowest token index to highest, then by group,
1926 # sort our list first on token index then group number
1927 @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
1929 # Make a task list of the form
1930 # [$i, ng_beg, $ng_end, $tok], ..
1932 # $i is the index of the token to be aligned
1933 # $ng_beg..$ng_end is the group range for this action
1935 my ( $i, $ng_end, $tok );
1936 foreach my $item (@icommon) {
1937 my $ng_last = $ng_end;
1939 ( $i, $ng_end, $tok ) = @{$item};
1940 my $ng_beg = $ng_end - 1;
1941 if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
1942 my $var = pop(@todo);
1943 $ng_beg = $var->[1];
1945 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1946 push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
1949 ###############################
1950 # Step 3: Execute the task list
1951 ###############################
1952 do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
1957 { ## closure for sub do_left_to_right_sweep
1959 my %is_good_alignment_token;
1963 # One of the most difficult aspects of vertical alignment is knowing
1964 # when not to align. Alignment can go from looking very nice to very
1965 # bad when overdone. In the sweep algorithm there are two special
1966 # cases where we may need to limit padding to a '$short_pad' distance
1967 # to avoid some very ugly formatting:
1969 # 1. Two isolated lines with partial alignment
1970 # 2. A 'tail-wag-dog' situation, in which a single terminal
1971 # line with partial alignment could cause a significant pad
1972 # increase in many previous lines if allowed to join the alignment.
1974 # For most alignment tokens, we will allow only a small pad to be
1975 # introduced (the hardwired $short_pad variable) . But for some 'good'
1976 # alignments we can be less restrictive.
1978 # These are 'good' alignments, which are allowed more padding:
1980 => = ? if unless or || {
1983 @is_good_alignment_token{@q} = (0) x scalar(@q);
1985 # Promote a few of these to 'best', with essentially no pad limit:
1986 $is_good_alignment_token{'='} = 1;
1987 $is_good_alignment_token{'if'} = 1;
1988 $is_good_alignment_token{'unless'} = 1;
1989 $is_good_alignment_token{'=>'} = 1
1991 # Note the hash values are set so that:
1992 # if ($is_good_alignment_token{$raw_tok}) => best
1993 # if defined ($is_good_alignment_token{$raw_tok}) => good or best
1997 sub do_left_to_right_sweep {
1998 my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2001 # $blocking_level[$nj is the level at a match failure between groups
2004 my $group_list_type = $rlines->[0]->get_list_type();
2006 my $move_to_common_column = sub {
2008 # Move the alignment column of token $itok to $col_want for a
2009 # sequence of groups.
2010 my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
2011 return unless ( defined($ngb) && $nge > $ngb );
2012 foreach my $ng ( $ngb .. $nge ) {
2014 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2015 my $line = $rlines->[$jbeg];
2016 my $col = $line->get_column($itok);
2017 my $avail = $line->get_available_space_on_right();
2018 my $move = $col_want - $col;
2021 # limit padding increase in isolated two lines
2023 if ( defined( $rmax_move->{$ng} )
2024 && $move > $rmax_move->{$ng}
2025 && !$is_good_alignment_token{$raw_tok} );
2027 $line->increase_field_width( $itok, $move );
2029 elsif ( $move < 0 ) {
2031 # spot to take special action on failure to move
2036 foreach my $task ( @{$rtodo} ) {
2037 my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2039 # Nothing to do for a single group
2040 next unless ( $ng_end > $ng_beg );
2042 my $ng_first; # index of the first group of a continuous sequence
2043 my $col_want; # the common alignment column of a sequence of groups
2044 my $col_limit; # maximum column before bumping into max line length
2045 my $line_count_ng_m = 0;
2049 # Loop over the groups
2050 # 'ix_' = index in the array of lines
2051 # 'ng_' = index in the array of groups
2052 # 'it_' = index in the array of tokens
2053 my $ix_min = $rgroups->[$ng_beg]->[0];
2054 my $ix_max = $rgroups->[$ng_end]->[1];
2055 my $lines_total = $ix_max - $ix_min + 1;
2056 foreach my $ng ( $ng_beg .. $ng_end ) {
2057 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2058 my $line_count_ng = $ix_end - $ix_beg + 1;
2060 # Important: note that since all lines in a group have a common
2061 # alignments object, we just have to work on one of the lines
2062 # (the first line). All of the rest will be changed
2064 my $line = $rlines->[$ix_beg];
2065 my $jmax = $line->get_jmax();
2067 # the maximum space without exceeding the line length:
2068 my $avail = $line->get_available_space_on_right();
2069 my $col = $line->get_column($itok);
2070 my $col_max = $col + $avail;
2072 # Initialize on first group
2073 if ( !defined($col_want) ) {
2076 $col_limit = $col_max;
2077 $line_count_ng_m = $line_count_ng;
2079 $it_stop_m = $it_stop;
2083 # RULE: Throw a blocking flag upon encountering a token level
2084 # different from the level of the first blocking token. For
2085 # example, in the following example, if the = matches get
2086 # blocked between two groups as shown, then we want to start
2087 # blocking matches at the commas, which are at deeper level, so
2088 # that we do not get the big gaps shown here:
2090 # my $unknown3 = pack( "v", -2 );
2091 # my $unknown4 = pack( "v", 0x09 );
2092 # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
2093 # my $num_bbd_blocks = pack( "V", $num_lists );
2094 # my $root_startblock = pack( "V", $root_start );
2095 # my $unknown6 = pack( "VV", 0x00, 0x1000 );
2097 # On the other hand, it is okay to keep matching at the same
2098 # level such as in a simple list of commas and/or fat arrors.
2100 my $is_blocked = defined( $blocking_level[$ng] )
2101 && $lev > $blocking_level[$ng];
2103 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2104 # Do not let one or two lines with a **different number of
2105 # alignments** open up a big gap in a large block. For
2106 # example, we will prevent something like this, where the first
2107 # line prys open the rest:
2109 # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2110 # $worksheet->write( "C7", "", $format );
2111 # $worksheet->write( "D7", "", $format );
2112 # $worksheet->write( "D8", "", $format );
2113 # $worksheet->write( "D8", "", $format );
2115 # We should exclude from consideration two groups which are
2116 # effectively the same but separated because one does not
2117 # fit in the maximum allowed line length.
2119 $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2121 my $lines_above = $ix_beg - $ix_min;
2122 my $lines_below = $lines_total - $lines_above;
2124 # Increase the tolerable gap for certain favorable factors
2126 my $top_level = $lev == $group_level;
2128 # Align best top level alignment tokens like '=', 'if', ...
2129 # A factor of 10 allows a gap of up to 40 spaces
2130 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2134 # Otherwise allow some minimal padding of good alignments
2137 defined( $is_good_alignment_token{$raw_tok} )
2139 # We have to be careful if there are just 2 lines. This
2140 # two-line factor allows large gaps only for 2 lines which
2141 # are simple lists with fewer items on the second line. It
2142 # gives results similar to previous versions of perltidy.
2143 && ( $lines_total > 2
2144 || $group_list_type && $jmax < $jmax_m && $top_level )
2154 if ( !$is_same_group ) {
2157 || $lines_above == 2 && $lines_below >= 4 )
2158 && $col_want > $col + $short_pad * $factor;
2161 || $lines_below == 2 && $lines_above >= 4 )
2162 && $col > $col_want + $short_pad * $factor;
2165 # if match is limited by gap size, stop aligning at this level
2167 $blocking_level[$ng] = $lev - 1;
2170 # quit and restart if it cannot join this batch
2171 if ( $col_want > $col_max
2172 || $col > $col_limit
2177 # remember the level of the first blocking token
2178 if ( !defined( $blocking_level[$ng] ) ) {
2179 $blocking_level[$ng] = $lev;
2182 $move_to_common_column->(
2183 $ng_first, $ng - 1, $itok, $col_want, $raw_tok
2187 $col_limit = $col_max;
2188 $line_count_ng_m = $line_count_ng;
2190 $it_stop_m = $it_stop;
2194 $line_count_ng_m += $line_count_ng;
2196 # update the common column and limit
2197 if ( $col > $col_want ) { $col_want = $col }
2198 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2200 } ## end loop over groups
2202 if ( $ng_end > $ng_first ) {
2203 $move_to_common_column->(
2204 $ng_first, $ng_end, $itok, $col_want, $raw_tok
2206 } ## end loop over groups for one task
2207 } ## end loop over tasks
2213 sub delete_selected_tokens {
2215 my ( $line_obj, $ridel ) = @_;
2217 # $line_obj is the line to be modified
2218 # $ridel is a ref to list of indexes to be deleted
2220 # remove an unused alignment token(s) to improve alignment chances
2222 return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2224 my $jmax_old = $line_obj->get_jmax();
2225 my $rfields_old = $line_obj->get_rfields();
2226 my $rfield_lengths_old = $line_obj->get_rfield_lengths();
2227 my $rpatterns_old = $line_obj->get_rpatterns();
2228 my $rtokens_old = $line_obj->get_rtokens();
2229 my $j_terminal_match = $line_obj->get_j_terminal_match();
2231 use constant EXPLAIN_DELETE_SELECTED => 0;
2234 EXPLAIN_DELETE_SELECTED && print <<EOM;
2235 delete indexes: <@{$ridel}>
2237 old tokens: <@{$rtokens_old}>
2238 old patterns: <@{$rpatterns_old}>
2239 old fields: <@{$rfields_old}>
2240 old field_lengths: <@{$rfield_lengths_old}>
2243 my $rfields_new = [];
2244 my $rpatterns_new = [];
2245 my $rtokens_new = [];
2246 my $rfield_lengths_new = [];
2248 # Convert deletion list to a hash to allow any order, multiple entries,
2249 # and avoid problems with index values out of range
2251 @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2253 my $pattern = $rpatterns_old->[0];
2254 my $field = $rfields_old->[0];
2255 my $field_length = $rfield_lengths_old->[0];
2256 push @{$rfields_new}, $field;
2257 push @{$rfield_lengths_new}, $field_length;
2258 push @{$rpatterns_new}, $pattern;
2260 # Loop to either copy items or concatenate fields and patterns
2262 for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
2263 my $token = $rtokens_old->[$j];
2264 my $field = $rfields_old->[ $j + 1 ];
2265 my $field_length = $rfield_lengths_old->[ $j + 1 ];
2266 my $pattern = $rpatterns_old->[ $j + 1 ];
2267 if ( !$delete_me{$j} ) {
2268 push @{$rtokens_new}, $token;
2269 push @{$rfields_new}, $field;
2270 push @{$rpatterns_new}, $pattern;
2271 push @{$rfield_lengths_new}, $field_length;
2274 if ( !defined($jmin_del) ) { $jmin_del = $j }
2275 $rfields_new->[-1] .= $field;
2276 $rfield_lengths_new->[-1] += $field_length;
2277 $rpatterns_new->[-1] .= $pattern;
2281 # ----- x ------ x ------ x ------
2282 #t 0 1 2 <- token indexing
2283 #f 0 1 2 3 <- field and pattern
2285 my $jmax_new = @{$rfields_new} - 1;
2286 $line_obj->set_rtokens($rtokens_new);
2287 $line_obj->set_rpatterns($rpatterns_new);
2288 $line_obj->set_rfields($rfields_new);
2289 $line_obj->set_rfield_lengths($rfield_lengths_new);
2290 $line_obj->set_jmax($jmax_new);
2292 # The value of j_terminal_match will be incorrect if we delete tokens prior
2293 # to it. We will have to give up on aligning the terminal tokens if this
2295 if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2296 $line_obj->set_j_terminal_match(undef);
2299 # update list type -
2300 if ( $line_obj->get_list_seqno() ) {
2302 ## This works, but for efficiency see if we need to make a change:
2303 ## decide_if_list($line_obj);
2305 # An existing list will still be a list but with possibly different
2307 my $old_list_type = $line_obj->get_list_type();
2308 my $new_list_type = "";
2309 if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2310 $new_list_type = $rtokens_new->[0];
2312 if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2313 decide_if_list($line_obj);
2317 EXPLAIN_DELETE_SELECTED && print <<EOM;
2320 new tokens: <@{$rtokens_new}>
2321 new patterns: <@{$rpatterns_new}>
2322 new fields: <@{$rfields_new}>
2327 { ## closure for sub decode_alignment_token
2329 # This routine is called repeatedly for each token, so it needs to be
2330 # efficient. We can speed things up by remembering the inputs and outputs
2334 sub initialize_decode {
2336 # We will re-initialize the hash for each file. Otherwise, there is
2337 # a danger that the hash can become arbitrarily large if a very large
2338 # number of files is processed at once.
2339 %decoded_token = ();
2343 sub decode_alignment_token {
2345 # Unpack the values packed in an alignment token
2348 # my ( $raw_tok, $lev, $tag, $tok_count ) =
2349 # decode_alignment_token($token);
2351 # Alignment tokens have a trailing decimal level and optional tag (for
2353 # For example, the first comma in the following line
2354 # sub banner { crlf; report( shift, '/', shift ); crlf }
2355 # is decorated as follows:
2356 # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
2358 # An optional token count may be appended with a leading dot.
2359 # Currently this is only done for '=' tokens but this could change.
2360 # For example, consider the following line:
2361 # $nport = $port = shift || $name;
2362 # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2363 # The second '=' will be '=0.2' [level 0, second equals]
2366 if ( defined( $decoded_token{$tok} ) ) {
2367 return @{ $decoded_token{$tok} };
2370 my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
2371 if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2375 $tok_count = $5 if ($5);
2377 my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2378 $decoded_token{$tok} = \@vals;
2383 { ## closure for sub delete_unmatched_tokens
2386 my %keep_after_deleted_assignment;
2392 = **= += *= &= <<= &&=
2393 -= /= |= >>= ||= //=
2397 @is_assignment{@q} = (1) x scalar(@q);
2399 # These tokens may be kept following an = deletion
2403 @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2407 # This flag is for testing only and should normally be zero.
2408 use constant TEST_DELETE_NULL => 0;
2410 sub delete_unmatched_tokens {
2411 my ( $rlines, $group_level ) = @_;
2413 # This is a preliminary step in vertical alignment in which we remove
2414 # as many obviously un-needed alignment tokens as possible. This will
2415 # prevent them from interfering with the final alignment.
2417 # These are the return values
2418 my $max_lev_diff = 0; # used to avoid a call to prune_tree
2419 my $saw_side_comment = 0; # used to avoid a call for side comments
2421 # Handle no lines -- shouldn't happen
2422 return unless @{$rlines};
2424 # Handle a single line
2425 if ( @{$rlines} == 1 ) {
2426 my $line = $rlines->[0];
2427 my $jmax = $line->get_jmax();
2428 my $length = $line->get_rfield_lengths()->[$jmax];
2429 $saw_side_comment = $length > 0;
2430 return ( $max_lev_diff, $saw_side_comment );
2433 my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
2435 # ignore hanging side comments in these operations
2436 my @filtered = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
2437 my $rnew_lines = \@filtered;
2439 $saw_side_comment = @filtered != @{$rlines};
2442 # nothing to do if all lines were hanging side comments
2443 my $jmax = @{$rnew_lines} - 1;
2444 return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2450 # create a hash of tokens for each line
2451 my $rline_hashes = [];
2452 foreach my $line ( @{$rnew_lines} ) {
2454 my $rtokens = $line->get_rtokens();
2455 my $rpatterns = $line->get_rpatterns();
2457 my ( $i_eq, $tok_eq, $pat_eq );
2458 my ( $lev_min, $lev_max );
2459 foreach my $tok ( @{$rtokens} ) {
2460 my ( $raw_tok, $lev, $tag, $tok_count ) =
2461 decode_alignment_token($tok);
2463 if ( $tok ne '#' ) {
2464 if ( !defined($lev_min) ) {
2469 if ( $lev < $lev_min ) { $lev_min = $lev }
2470 if ( $lev > $lev_max ) { $lev_max = $lev }
2474 if ( !$saw_side_comment ) {
2475 my $length = $line->get_rfield_lengths()->[ $i + 1 ];
2476 $saw_side_comment ||= $length;
2480 # Possible future upgrade: for multiple matches,
2481 # record [$i1, $i2, ..] instead of $i
2483 [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2485 # remember the first equals at line level
2486 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2488 if ( $lev eq $group_level ) {
2491 $pat_eq = $rpatterns->[$i];
2496 push @{$rline_hashes}, $rhash;
2497 push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
2498 push @line_info, [ $lev_min, $lev_max ];
2499 if ( defined($lev_min) ) {
2500 my $lev_diff = $lev_max - $lev_min;
2501 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2505 # compare each line pair and record matches
2508 for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
2512 my $rhash_l = $rline_hashes->[$jl];
2513 my $rhash_r = $rline_hashes->[$jr];
2514 my $count = 0; # UNUSED NOW?
2516 foreach my $tok ( keys %{$rhash_l} ) {
2518 if ( defined( $rhash_r->{$tok} ) ) {
2519 if ( $tok ne '#' ) { $count++; }
2520 my $il = $rhash_l->{$tok}->[0];
2521 my $ir = $rhash_r->{$tok}->[0];
2522 $rhash_l->{$tok}->[2] = $ir;
2523 $rhash_r->{$tok}->[1] = $il;
2524 if ( $tok ne '#' ) {
2525 push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2531 # Set a line break if no matching tokens between these lines
2532 # (this is not strictly necessary now but does not hurt)
2533 if ( $nr == 0 && $nl > 0 ) {
2534 $rnew_lines->[$jl]->set_end_group(1);
2537 # Also set a line break if both lines have simple equals but with
2538 # different leading characters in patterns. This check is similar
2539 # to one in sub check_match, and will prevent sub
2540 # prune_alignment_tree from removing alignments which otherwise
2541 # should be kept. This fix is rarely needed, but it can
2542 # occasionally improve formatting.
2544 # my $name = $this->{Name};
2545 # $type = $this->ctype($genlooptype) if defined $genlooptype;
2546 # my $declini = ( $asgnonly ? "" : "\t$type *" );
2547 # my $cast = ( $type ? "($type *)" : "" );
2548 # The last two lines start with 'my' and will not match the
2549 # previous line starting with $type, so we do not want
2550 # prune_alignment tree to delete their ? : alignments at a deeper
2552 my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2553 my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2554 if ( defined($i_eq_l) && defined($i_eq_r) ) {
2556 # Also, do not align equals across a change in ci level
2557 my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
2558 $rnew_lines->[$jr]->get_ci_level();
2561 $tok_eq_l eq $tok_eq_r
2564 && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2568 $rnew_lines->[$jl]->set_end_group(1);
2575 push @subgroups, [ 0, $jmax ];
2576 for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
2577 if ( $rnew_lines->[$jl]->get_end_group() ) {
2578 $subgroups[-1]->[1] = $jl;
2579 push @subgroups, [ $jl + 1, $jmax ];
2583 # flag to allow skipping pass 2
2584 my $saw_large_group;
2586 ############################################################
2587 # PASS 1 over subgroups to remove unmatched alignment tokens
2588 ############################################################
2589 foreach my $item (@subgroups) {
2590 my ( $jbeg, $jend ) = @{$item};
2592 my $nlines = $jend - $jbeg + 1;
2594 ####################################################
2595 # Look for complete if/elsif/else and ternary blocks
2596 ####################################################
2598 # We are looking for a common '$dividing_token' like these:
2600 # if ( $b and $s ) { $p->{'type'} = 'a'; }
2601 # elsif ($b) { $p->{'type'} = 'b'; }
2602 # elsif ($s) { $p->{'type'} = 's'; }
2603 # else { $p->{'type'} = ''; }
2604 # ^----------- dividing_token
2607 # !$routine ? '[PFX]'
2608 # : $routine =~ /warn.*_d\z/ ? '[DS]'
2609 # : $routine =~ /ck_warn/ ? 'W'
2610 # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2611 # : $routine =~ /ckWARN\d*reg/ ? 'W'
2612 # : $routine =~ /vWARN\d/ ? '[WDS]'
2614 # ^----------- dividing_token
2616 # Only look for groups which are more than 2 lines long. Two lines
2617 # can get messed up doing this, probably due to the various
2621 my %token_line_count;
2622 if ( $nlines > 2 ) {
2624 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2626 my $line = $rnew_lines->[$jj];
2627 my $rtokens = $line->get_rtokens();
2628 foreach my $tok ( @{$rtokens} ) {
2629 if ( !$seen{$tok} ) {
2631 $token_line_count{$tok}++;
2636 foreach my $tok ( keys %token_line_count ) {
2637 if ( $token_line_count{$tok} == $nlines ) {
2638 if ( substr( $tok, 0, 1 ) eq '?'
2639 || substr( $tok, 0, 1 ) eq '{'
2640 && $tok =~ /^\{\d+if/ )
2642 $dividing_token = $tok;
2649 #####################################################
2650 # Loop over lines to remove unwanted alignment tokens
2651 #####################################################
2652 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2653 my $line = $rnew_lines->[$jj];
2654 my $rtokens = $line->get_rtokens();
2655 my $rhash = $rline_hashes->[$jj];
2656 my $i_eq = $equals_info[$jj]->[0];
2658 my $imax = @{$rtokens} - 2;
2659 my $delete_above_level;
2660 my $deleted_assignment_token;
2662 my $saw_dividing_token = "";
2663 $saw_large_group ||= $nlines > 2 && $imax > 1;
2665 # Loop over all alignment tokens
2666 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2667 my $tok = $rtokens->[$i];
2668 next if ( $tok eq '#' ); # shouldn't happen
2669 my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2670 @{ $rhash->{$tok} };
2672 #######################################################
2673 # Here is the basic RULE: remove an unmatched alignment
2674 # which does not occur in the surrounding lines.
2675 #######################################################
2676 my $delete_me = !defined($il) && !defined($ir);
2678 # But now we modify this with exceptions...
2680 # EXCEPTION 1: If we are in a complete ternary or
2681 # if/elsif/else group, and this token is not on every line
2682 # of the group, should we delete it to preserve overall
2684 if ($dividing_token) {
2685 if ( $token_line_count{$tok} >= $nlines ) {
2686 $saw_dividing_token ||= $tok eq $dividing_token;
2690 # For shorter runs, delete toks to save alignment.
2691 # For longer runs, keep toks after the '{' or '?'
2692 # to allow sub-alignments within braces. The
2693 # number 5 lines is arbitrary but seems to work ok.
2695 ( $nlines < 5 || !$saw_dividing_token );
2699 # EXCEPTION 2: Remove all tokens above a certain level
2700 # following a previous deletion. For example, we have to
2701 # remove tagged higher level alignment tokens following a
2702 # '=>' deletion because the tags of higher level tokens
2703 # will now be incorrect. For example, this will prevent
2704 # aligning commas as follows after deleting the second '=>'
2706 # ListBox => origin => [ 270, 160 ],
2707 # size => [ 200, 55 ],
2709 if ( defined($delete_above_level) ) {
2710 if ( $lev > $delete_above_level ) {
2711 $delete_me ||= 1; #$tag;
2713 else { $delete_above_level = undef }
2716 # EXCEPTION 3: Remove all but certain tokens after an
2717 # assignment deletion.
2719 $deleted_assignment_token
2720 && ( $lev > $group_level
2721 || !$keep_after_deleted_assignment{$raw_tok} )
2727 # EXCEPTION 4: Do not touch the first line of a 2 line
2728 # terminal match, such as below, because j_terminal has
2730 # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
2731 # else { $tago = $tagc = ''; }
2732 # But see snippets 'else1.t' and 'else2.t'
2735 && $has_terminal_match
2738 # EXCEPTION 5: misc additional rules for commas and equals
2741 # okay to delete second and higher copies of a token
2742 if ( $tok_count == 1 ) {
2745 if ( $raw_tok eq ',' ) {
2747 # Do not delete commas before an equals
2749 if ( defined($i_eq) && $i < $i_eq );
2751 # Do not delete line-level commas
2752 $delete_me = 0 if ( $lev <= $group_level );
2755 # For an assignment at group level..
2756 if ( $is_assignment{$raw_tok}
2757 && $lev == $group_level )
2760 # Do not delete if it is the last alignment of
2761 # multiple tokens; this will prevent some
2762 # undesirable alignments
2763 if ( $imax > 0 && $i == $imax ) {
2767 # Otherwise, set a flag to delete most
2769 else { $deleted_assignment_token = $raw_tok }
2774 #####################################
2775 # Add this token to the deletion list
2776 #####################################
2780 # update deletion propagation flags
2781 if ( !defined($delete_above_level)
2782 || $lev < $delete_above_level )
2785 # delete all following higher level alignments
2786 $delete_above_level = $lev;
2788 # but keep deleting after => to next lower level
2789 # to avoid some bizarre alignments
2790 if ( $raw_tok eq '=>' ) {
2791 $delete_above_level = $lev - 1;
2795 } # End loop over alignment tokens
2797 # Process all deletion requests for this line
2799 delete_selected_tokens( $line, \@idel );
2801 } # End loopover lines
2802 } # End loop over subgroups
2804 #################################################
2805 # PASS 2 over subgroups to remove null alignments
2806 #################################################
2808 # This pass is only used for testing. It is helping to identify
2809 # alignment situations which might be improved with a future more
2810 # general algorithm which adds a tail matching capability.
2811 if (TEST_DELETE_NULL) {
2812 delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
2813 if ($saw_large_group);
2816 # PASS 3: Construct a tree of matched lines and delete some small deeper
2817 # levels of tokens. They also block good alignments.
2818 prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2820 # PASS 4: compare all lines for common tokens
2821 match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2823 return ( $max_lev_diff, $saw_side_comment );
2827 sub delete_null_alignments {
2828 my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
2830 # This is an optional second pass for deleting alignment tokens which can
2831 # occasionally improve alignment. We look for and remove 'null
2832 # alignments', which are alignments that require no padding. So we can
2833 # 'cheat' and delete them. For example, notice the '=~' alignment in the
2834 # first two lines of the following code:
2836 # $sysname .= 'del' if $self->label =~ /deletion/;
2837 # $sysname .= 'ins' if $self->label =~ /insertion/;
2838 # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
2840 # These '=~' tokens are already aligned because they are both the same
2841 # distance from the previous alignment token, the 'if'. So we can
2842 # eliminate them as alignments. The advantage is that in some cases, such
2843 # as this one, this will allow other tokens to be aligned. In this case we
2844 # then get the 'if' tokens to align:
2846 # $sysname .= 'del' if $self->label =~ /deletion/;
2847 # $sysname .= 'ins' if $self->label =~ /insertion/;
2848 # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
2850 # The following rules for limiting this operation have been found to
2851 # work well and avoid problems:
2853 # Rule 1. We only consider a sequence of lines which have the same
2854 # sequence of alignment tokens.
2856 # Rule 2. We never eliminate the first alignment token. One reason is that
2857 # lines may have different leading indentation spaces, so keeping the
2858 # first alignment token insures that our length measurements start at
2859 # a well-defined point. Another reason is that nothing is gained because
2860 # the left-to-right sweep can always handle alignment of this token.
2862 # Rule 3. We require that the first alignment token exist in either
2863 # a previous line or a subsequent line. The reason is that this avoids
2864 # changing two-line matches which go through special logic.
2866 # Rule 4. Do not delete a token which occurs in a previous or subsequent
2867 # line. For example, in the above example, it was ok to eliminate the '=~'
2868 # token from two lines because it did not occur in a surrounding line.
2869 # If it did occur in a surrounding line, the result could be confusing
2870 # or even incorrectly aligned.
2872 # A consequence of these rules is that we only need to consider subgroups
2873 # with at least 3 lines and 2 alignment tokens.
2875 # The subgroup line index range
2876 my ( $jbeg, $jend );
2878 # Vars to keep track of the start of a current sequence of matching
2881 my $rfield_lengths_match;
2887 # Vars for a line being tested
2892 my $start_match = sub {
2894 $rtokens_match = $rtokens;
2895 $rfield_lengths_match = $rfield_lengths;
2898 $imax_match = $imax;
2903 my $add_to_match = sub {
2907 # Keep track of any padding that would be needed for each token
2908 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2909 next if ( $rneed_pad->[$i] );
2910 my $length = $rfield_lengths->[$i];
2911 my $length_match = $rfield_lengths_match->[$i];
2912 if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
2916 my $end_match = sub {
2917 return unless ( $j_match_end > $j_match_beg );
2918 my $nlines = $j_match_end - $j_match_beg + 1;
2919 my $rhash_beg = $rline_hashes->[$j_match_beg];
2920 my $rhash_end = $rline_hashes->[$j_match_end];
2923 # Do not delete unless the first token also occurs in a surrounding line
2924 my $tok0 = $rtokens_match->[0];
2928 $j_match_beg > $jbeg
2929 && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
2932 || ( $j_match_end < $jend
2933 && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
2937 # Note that we are skipping the token at i=0
2938 for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
2940 # do not delete a token which requires padding to align
2941 next if ( $rneed_pad->[$i] );
2943 my $tok = $rtokens_match->[$i];
2945 # Do not delete a token which occurs in a surrounding line
2947 if ( $j_match_beg > $jbeg
2948 && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
2950 if ( $j_match_end < $jend
2951 && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
2955 ##print "ok to delete tok=$tok\n";
2958 foreach my $j ( $j_match_beg .. $j_match_end ) {
2959 delete_selected_tokens( $rnew_lines->[$j], \@idel );
2964 foreach my $item ( @{$rsubgroups} ) {
2965 ( $jbeg, $jend ) = @{$item};
2966 my $nlines = $jend - $jbeg + 1;
2967 next unless ( $nlines > 2 );
2969 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2970 my $line = $rnew_lines->[$jj];
2971 $rtokens = $line->get_rtokens();
2972 $rfield_lengths = $line->get_rfield_lengths();
2973 $imax = @{$rtokens} - 2;
2975 # start a new match group
2976 if ( $jj == $jbeg ) {
2977 $start_match->($jj);
2981 # see if all tokens of this line match the current group
2983 if ( $imax == $imax_match ) {
2984 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2985 my $tok = $rtokens->[$i];
2986 my $tok_match = $rtokens_match->[$i];
2987 last if ( $tok ne $tok_match );
2992 # yes, they all match
2994 $add_to_match->($jj);
2997 # now, this line does not match
3000 $start_match->($jj);
3002 } # End loopover lines
3004 } # End loop over subgroups
3006 } ## end sub delete_null_alignments
3008 sub match_line_pairs {
3009 my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3011 # Compare each pair of lines and save information about common matches
3012 # $rlines = list of lines including hanging side comments
3013 # $rnew_lines = list of lines without any hanging side comments
3014 # $rsubgroups = list of subgroups of the new lines
3017 # Maybe change: imax_pair => pair_match_info = ref to array
3018 # = [$imax_align, $rMsg, ... ]
3019 # This may eventually have multi-level match info
3021 # Previous line vars
3022 my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3023 $list_type_m, $ci_level_m );
3026 my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3029 use constant EXPLAIN_COMPARE_PATTERNS => 0;
3031 my $compare_patterns = sub {
3033 # helper routine to decide if patterns match well enough..
3035 # 0 = patterns match, continue
3037 # 2 = no match, and lines do not match at all
3039 my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3041 my $return_code = 1;
3043 my ( $alignment_token, $lev, $tag, $tok_count ) =
3044 decode_alignment_token($tok);
3046 # We have to be very careful about aligning commas
3047 # when the pattern's don't match, because it can be
3048 # worse to create an alignment where none is needed
3049 # than to omit one. Here's an example where the ','s
3050 # are not in named containers. The first line below
3051 # should not match the next two:
3052 # ( $a, $b ) = ( $b, $r );
3053 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3054 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3055 if ( $alignment_token eq ',' ) {
3057 # do not align commas unless they are in named
3059 $GoToMsg = "do not align commas in unnamed containers";
3060 goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
3063 # do not align parens unless patterns match;
3064 # large ugly spaces can occur in math expressions.
3065 elsif ( $alignment_token eq '(' ) {
3067 # But we can allow a match if the parens don't
3068 # require any padding.
3069 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3070 if ( $pad != 0 ) { goto NO_MATCH }
3073 # Handle an '=' alignment with different patterns to
3075 elsif ( $alignment_token eq '=' ) {
3077 # It is best to be a little restrictive when
3078 # aligning '=' tokens. Here is an example of
3079 # two lines that we will not align:
3082 # The problem is that one is a 'my' declaration,
3083 # and the other isn't, so they're not very similar.
3084 # We will filter these out by comparing the first
3085 # letter of the pattern. This is crude, but works
3087 if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3088 $GoToMsg = "first character before equals differ";
3092 # The introduction of sub 'prune_alignment_tree'
3093 # enabled alignment of lists left of the equals with
3094 # other scalar variables. For example:
3095 # my ( $D, $s, $e ) = @_;
3096 # my $d = length $D;
3097 # my $c = $e - $s - $d;
3099 # But this would change formatting of a lot of scripts,
3100 # so for now we prevent alignment of comma lists on the
3101 # left with scalars on the left. We will also prevent
3102 # any partial alignments.
3104 # set return code 2 if the = is at line level, but
3105 # set return code 1 if the = is below line level, i.e.
3106 # sub new { my ( $p, $v ) = @_; bless \$v, $p }
3107 # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3110 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
3112 $GoToMsg = "mixed commas/no-commas before equals";
3113 if ( $lev eq $group_level ) {
3121 return ( 0, \$GoToMsg );
3125 EXPLAIN_COMPARE_PATTERNS
3126 && print STDERR "no match because $GoToMsg\n";
3128 return ( $return_code, \$GoToMsg );
3130 }; ## end of $compare_patterns->()
3132 # loop over subgroups
3133 foreach my $item ( @{$rsubgroups} ) {
3134 my ( $jbeg, $jend ) = @{$item};
3135 my $nlines = $jend - $jbeg + 1;
3136 next unless ( $nlines > 1 );
3138 # loop over lines in a subgroup
3139 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
3142 $rtokens_m = $rtokens;
3143 $rpatterns_m = $rpatterns;
3144 $rfield_lengths_m = $rfield_lengths;
3146 $list_type_m = $list_type;
3147 $ci_level_m = $ci_level;
3149 $line = $rnew_lines->[$jj];
3150 $rtokens = $line->get_rtokens();
3151 $rpatterns = $line->get_rpatterns();
3152 $rfield_lengths = $line->get_rfield_lengths();
3153 $imax = @{$rtokens} - 2;
3154 $list_type = $line->get_list_type();
3155 $ci_level = $line->get_ci_level();
3157 # nothing to do for first line
3158 next if ( $jj == $jbeg );
3160 my $ci_jump = $ci_level - $ci_level_m;
3162 my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3164 my $imax_align = -1;
3166 # find number of leading common tokens
3168 #################################
3169 # No match to hanging side comment
3170 #################################
3171 if ( $line->get_is_hanging_side_comment() ) {
3173 # Should not get here; HSC's have been filtered out
3177 ##############################
3178 # Handle comma-separated lists
3179 ##############################
3180 elsif ( $list_type && $list_type eq $list_type_m ) {
3182 # do not align lists across a ci jump with new list method
3183 if ($ci_jump) { $imax_min = -1 }
3185 my $i_nomatch = $imax_min + 1;
3186 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
3187 my $tok = $rtokens->[$i];
3188 my $tok_m = $rtokens_m->[$i];
3189 if ( $tok ne $tok_m ) {
3195 $imax_align = $i_nomatch - 1;
3202 my $i_nomatch = $imax_min + 1;
3203 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
3204 my $tok = $rtokens->[$i];
3205 my $tok_m = $rtokens_m->[$i];
3206 if ( $tok ne $tok_m ) {
3211 my $pat = $rpatterns->[$i];
3212 my $pat_m = $rpatterns_m->[$i];
3214 # If patterns don't match, we have to be careful...
3215 if ( $pat_m ne $pat ) {
3217 $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3218 my ( $match_code, $rmsg ) = $compare_patterns->(
3219 $tok, $tok_m, $pat, $pat_m, $pad
3222 if ( $match_code eq 1 ) { $i_nomatch = $i }
3223 elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
3228 $imax_align = $i_nomatch - 1;
3231 $line_m->set_imax_pair($imax_align);
3233 } ## end loop over lines
3235 # Put fence at end of subgroup
3236 $line->set_imax_pair(-1);
3238 } ## end loop over subgroups
3240 # if there are hanging side comments, propagate the pair info down to them
3241 # so that lines can just look back one line for their pair info.
3242 if ( @{$rlines} > @{$rnew_lines} ) {
3243 my $last_pair_info = -1;
3244 foreach my $line ( @{$rlines} ) {
3245 if ( $line->get_is_hanging_side_comment() ) {
3246 $line->set_imax_pair($last_pair_info);
3249 $last_pair_info = $line->get_imax_pair();
3256 sub fat_comma_to_comma {
3259 # We are changing '=>' to ',' and removing any trailing decimal count
3260 # because currently fat commas have a count and commas do not.
3261 # For example, we will change '=>2+{-3.2' into ',2+{-3'
3262 if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3266 sub get_line_token_info {
3268 # scan lines of tokens and return summary information about the range of
3269 # levels and patterns.
3272 # First scan to check monotonicity. Here is an example of several
3273 # lines which are monotonic. The = is the lowest level, and
3274 # the commas are all one level deeper. So this is not nonmonotonic.
3275 # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
3276 # $$d{"days"} = [ "d", "day", "days" ];
3277 # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
3279 my $all_monotonic = 1;
3280 for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
3281 my ($line) = $rlines->[$jj];
3282 my $rtokens = $line->get_rtokens();
3284 my $is_monotonic = 1;
3286 foreach my $tok ( @{$rtokens} ) {
3288 my ( $raw_tok, $lev, $tag, $tok_count ) =
3289 decode_alignment_token($tok);
3290 push @{ $all_token_info[$jj] },
3291 [ $raw_tok, $lev, $tag, $tok_count ];
3292 last if ( $tok eq '#' );
3293 if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3296 if ( !$is_monotonic ) { $all_monotonic = 0 }
3299 my $rline_values = [];
3300 for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
3301 my ($line) = $rlines->[$jj];
3303 my $rtokens = $line->get_rtokens();
3305 my ( $lev_min, $lev_max );
3306 my $token_pattern_max = "";
3309 my $is_monotonic = 1;
3311 # find the index of the last token before the side comment
3312 my $imax = @{$rtokens} - 2;
3313 my $imax_true = $imax;
3315 # If the entire group is monotonic, and the line ends in a comma list,
3316 # walk it back to the first such comma. this will have the effect of
3317 # making all trailing ragged comma lists match in the prune tree
3318 # routine. these trailing comma lists can better be handled by later
3321 # Treat fat commas the same as commas here by converting them to
3322 # commas. This will improve the chance of aligning the leading parts
3325 my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3326 if ( $all_monotonic && $tok_end =~ /^,/ ) {
3329 && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
3336 # make a first pass to find level range
3338 foreach my $tok ( @{$rtokens} ) {
3340 last if ( $i > $imax );
3341 last if ( $tok eq '#' );
3342 my ( $raw_tok, $lev, $tag, $tok_count ) =
3343 @{ $all_token_info[$jj]->[$i] };
3345 last if ( $tok eq '#' );
3346 $token_pattern_max .= $tok;
3348 if ( !defined($lev_min) ) {
3353 if ( $lev < $lev_min ) { $lev_min = $lev; }
3354 if ( $lev > $lev_max ) { $lev_max = $lev; }
3355 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3361 my $rtoken_patterns = {};
3362 my $rtoken_indexes = {};
3363 my @levs = sort keys %saw_level;
3364 if ( !defined($lev_min) ) {
3368 $rtoken_patterns->{$lev_min} = "";
3369 $rtoken_indexes->{$lev_min} = [];
3373 elsif ( $lev_max == $lev_min ) {
3374 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3375 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3378 # handle multiple levels
3380 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3381 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3384 my $lev_top = pop @levs; # alread did max level
3386 foreach my $tok ( @{$rtokens} ) {
3388 last if ( $itok > $imax );
3389 my ( $raw_tok, $lev, $tag, $tok_count ) =
3390 @{ $all_token_info[$jj]->[$itok] };
3391 last if ( $raw_tok eq '#' );
3392 foreach my $lev_test (@levs) {
3393 next if ( $lev > $lev_test );
3394 $rtoken_patterns->{$lev_test} .= $tok;
3395 push @{ $rtoken_indexes->{$lev_test} }, $itok;
3398 push @levs, $lev_top;
3401 push @{$rline_values},
3403 $lev_min, $lev_max, $rtoken_patterns, \@levs,
3404 $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3410 print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3411 foreach my $key ( sort keys %{$rtoken_patterns} ) {
3412 print "$key => $rtoken_patterns->{$key}\n";
3413 print "$key => @{$rtoken_indexes->{$key}}\n";
3416 } ## end loop over lines
3417 return ( $rline_values, $all_monotonic );
3420 sub prune_alignment_tree {
3422 my $jmax = @{$rlines} - 1;
3423 return unless $jmax > 0;
3425 # Vertical alignment in perltidy is done as an iterative process. The
3426 # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3427 # etc) for vertical alignment. Then we have to delete all alignments
3428 # which, if actually made, would detract from overall alignment. This
3429 # is done in several phases of which this is one.
3431 # In this routine we look at the alignments of a group of lines as a
3432 # hierarchical tree. We will 'prune' the tree to limited depths if that
3433 # will improve overall alignment at the lower depths.
3434 # For each line we will be looking at its alignment patterns down to
3435 # different fixed depths. For each depth, we include all lower depths and
3436 # ignore all higher depths. We want to see if we can get alignment of a
3437 # larger group of lines if we ignore alignments at some lower depth.
3438 # Here is an # example:
3441 # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
3442 # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
3443 # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
3444 # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
3447 # In the above example, all lines have three commas at the lowest depth
3448 # (zero), so if there were no other alignements, these lines would all
3449 # align considering only the zero depth alignment token. But some lines
3450 # have additional comma alignments at the next depth, so we need to decide
3451 # if we should drop those to keep the top level alignments, or keep those
3452 # for some additional low level alignments at the expense losing some top
3453 # level alignments. In this case we will drop the deeper level commas to
3454 # keep the entire collection aligned. But in some cases the decision could
3457 # The tree for this example at the zero depth has one node containing
3458 # all four lines, since they are identical at zero level (three commas).
3459 # At depth one, there are three 'children' nodes, namely:
3460 # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3461 # - line 3, which has 2 commas at depth 1
3462 # - line4, which has a ';' and a ',' at depth 1
3463 # There are no deeper alignments in this example.
3464 # so the tree structure for this example is:
3466 # depth 0 depth 1 depth 2
3467 # [lines 1-4] -- [line 1-2] - (empty)
3468 # | [line 3] - (empty)
3469 # | [line 4] - (empty)
3471 # We can carry this to any depth, but it is not really useful to go below
3472 # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3473 # alignments at depth >=2.
3475 use constant EXPLAIN_PRUNE => 0;
3477 ####################################################################
3478 # Prune Tree Step 1. Start by scanning the lines and collecting info
3479 ####################################################################
3481 # Note that the caller had this info but we have to redo this now because
3482 # alignment tokens may have been deleted.
3483 my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3485 # If all the lines have levels which increase monotonically from left to
3486 # right, then the sweep-left-to-right pass can do a better job of alignment
3487 # than pruning, and without deleting alignments.
3488 return if ($all_monotonic);
3490 # Contents of $rline_values
3492 # $lev_min, $lev_max, $rtoken_patterns, \@levs,
3493 # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3496 # We can work to any depth, but there is little advantage to working
3497 # to a a depth greater than 2
3500 # This arrays will hold the tree of alignment tokens at different depths
3504 # Tree nodes contain these values:
3505 # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3506 # $nc_beg_p, $nc_end_p, $rindexes];
3508 # $depth = 0,1,2 = index of depth of the match
3510 # $jbeg beginning index j of the range of lines in this match
3511 # $jend ending index j of the range of lines in this match
3512 # $n_parent = index of the containing group at $depth-1, if it exists
3513 # $level = actual level of code being matched in this group
3514 # $pattern = alignment pattern being matched
3515 # $nc_beg_p = first child
3516 # $nc_end_p = last child
3517 # $rindexes = ref to token indexes
3519 # the patterns and levels of the current group being formed at each depth
3520 my ( @token_patterns_current, @levels_current, @token_indexes_current );
3522 # the patterns and levels of the next line being tested at each depth
3523 my ( @token_patterns_next, @levels_next, @token_indexes_next );
3525 #########################################################
3526 # define a recursive worker subroutine for tree construction
3527 #########################################################
3529 # This is a recursive routine which is called if a match condition changes
3530 # at any depth when a new line is encountered. It ends the match node
3531 # which changed plus all deeper nodes attached to it.
3534 my ( $depth, $jl, $n_parent ) = @_;
3536 # $depth is the tree depth
3537 # $jl is the index of the line
3538 # $n_parent is index of the parent node of this node
3540 return if ( $depth > $MAX_DEPTH );
3542 # end any current group at this depth
3544 && defined( $match_tree[$depth] )
3545 && @{ $match_tree[$depth] }
3546 && defined( $levels_current[$depth] ) )
3548 $match_tree[$depth]->[-1]->[1] = $jl;
3551 # Define the index of the node we will create below
3553 if ( defined( $match_tree[$depth] ) ) {
3554 $ng_self = @{ $match_tree[$depth] };
3557 # end any next deeper child node(s)
3558 $end_node->( $depth + 1, $jl, $ng_self );
3560 # update the levels being matched
3561 $token_patterns_current[$depth] = $token_patterns_next[$depth];
3562 $token_indexes_current[$depth] = $token_indexes_next[$depth];
3563 $levels_current[$depth] = $levels_next[$depth];
3565 # Do not start a new group at this level if it is not being used
3566 if ( !defined( $levels_next[$depth] )
3568 && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3573 # Create a node for the next group at this depth. We initially assume
3574 # that it will continue to $jmax, and correct that later if the node
3576 push @{ $match_tree[$depth] },
3578 $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3579 $token_patterns_current[$depth],
3580 undef, undef, $token_indexes_current[$depth],
3584 }; ## end sub end_node
3586 ######################################################
3587 # Prune Tree Step 2. Loop to form the tree of matches.
3588 ######################################################
3589 for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
3591 # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3594 # Pull out needed values for the next line
3595 my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3596 $is_monotonic, $imax_true, $imax )
3597 = @{ $rline_values->[$jp] };
3599 # Transfer levels and patterns for this line to the working arrays.
3600 # If the number of levels differs from our chosen MAX_DEPTH ...
3601 # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3602 # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3603 @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3604 if ( @{$rlevs} > $MAX_DEPTH ) {
3605 $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3608 foreach (@levels_next) {
3609 $token_patterns_next[$depth] =
3610 defined($_) ? $rtoken_patterns->{$_} : undef;
3611 $token_indexes_next[$depth] =
3612 defined($_) ? $rtoken_indexes->{$_} : undef;
3616 # Look for a change in match groups...
3618 # Initialize on the first line
3621 $end_node->( 0, $jm, $n_parent );
3624 # End groups if a hard flag has been set
3625 elsif ( $rlines->[$jm]->get_end_group() ) {
3627 $end_node->( 0, $jm, $n_parent );
3630 # Continue at hanging side comment
3631 elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
3635 # Otherwise see if anything changed and update the tree if so
3637 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3639 my $def_current = defined( $token_patterns_current[$depth] );
3640 my $def_next = defined( $token_patterns_next[$depth] );
3641 last unless ( $def_current || $def_next );
3644 || $token_patterns_current[$depth] ne
3645 $token_patterns_next[$depth] )
3648 if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3649 $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3651 $end_node->( $depth, $jm, $n_parent );
3656 } ## end loop to form tree of matches
3658 ##########################################################
3659 # Prune Tree Step 3. Make links from parent to child nodes
3660 ##########################################################
3662 # It seemed cleaner to do this as a separate step rather than during tree
3663 # construction. The children nodes have links up to the parent node which
3664 # created them. Now make links in the opposite direction, so the parents
3665 # can find the children. We store the range of children nodes ($nc_beg,
3666 # $nc_end) of each parent with two additional indexes in the orignal array.
3667 # These will be undef if no children.
3668 for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
3669 next unless defined( $match_tree[$depth] );
3670 my $nc_max = @{ $match_tree[$depth] } - 1;
3672 foreach my $nc ( 0 .. $nc_max ) {
3673 my $np = $match_tree[$depth]->[$nc]->[2];
3674 if ( !defined($np) ) {
3677 #print STDERR "lost child $np at depth $depth\n";
3680 if ( !defined($np_now) || $np != $np_now ) {
3682 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3684 $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3686 } ## end loop to make links down to the child nodes
3688 EXPLAIN_PRUNE > 0 && do {
3689 print "Tree complete. Found these groups:\n";
3690 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3691 Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3695 #######################################################
3696 # Prune Tree Step 4. Make a list of nodes to be deleted
3697 #######################################################
3699 # list of lines with tokens to be deleted:
3700 # [$jbeg, $jend, $level_keep]
3701 # $jbeg..$jend is the range of line indexes,
3702 # $level_keep is the minimum level to keep
3705 # Groups with ending comma lists and their range of sizes:
3706 # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3707 my %ragged_comma_group;
3709 # Define a threshold line count for forcing a break
3710 my $nlines_break = 3;
3712 # We work with a list of nodes to visit at the next deeper depth.
3714 if ( defined( $match_tree[0] ) ) {
3715 @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3718 for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
3719 last unless (@todo_list);
3721 foreach my $np (@todo_list) {
3722 my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3724 = @{ $match_tree[$depth]->[$np] };
3725 my $nlines_p = $jend_p - $jbeg_p + 1;
3727 # nothing to do if no children
3728 next unless defined($nc_beg_p);
3730 # Define the number of lines to either keep or delete a child node.
3731 # This is the key decision we have to make. We want to delete
3732 # short runs of matched lines, and keep long runs. It seems easier
3733 # for the eye to follow breaks in monotonic level changes than
3734 # non-monotonic level changes. For example, the following looks
3735 # best if we delete the lower level alignments:
3738 # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3739 # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
3740 # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
3741 # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
3744 # So we will use two thresholds.
3745 my $nmin_mono = $depth + 2;
3746 my $nmin_non_mono = $depth + 6;
3747 if ( $nmin_mono > $nlines_p - 1 ) {
3748 $nmin_mono = $nlines_p - 1;
3750 if ( $nmin_non_mono > $nlines_p - 1 ) {
3751 $nmin_non_mono = $nlines_p - 1;
3754 # loop to keep or delete each child node
3755 foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3756 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3758 = @{ $match_tree[ $depth + 1 ]->[$nc] };
3759 my $nlines_c = $jend_c - $jbeg_c + 1;
3760 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3761 my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3762 if ( $nlines_c < $nmin ) {
3763 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3764 push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3767 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3768 push @todo_next, $nc;
3772 @todo_list = @todo_next;
3773 } ## end loop to mark nodes to delete
3775 #############################################################
3776 # Prune Tree Step 5. Loop to delete selected alignment tokens
3777 #############################################################
3778 foreach my $item (@delete_list) {
3779 my ( $jbeg, $jend, $level_keep ) = @{$item};
3780 foreach my $jj ( $jbeg .. $jend ) {
3781 my $line = $rlines->[$jj];
3783 my $rtokens = $line->get_rtokens();
3784 my $imax = @{$rtokens} - 2;
3785 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
3786 my $tok = $rtokens->[$i];
3787 my ( $raw_tok, $lev, $tag, $tok_count ) =
3788 decode_alignment_token($tok);
3789 if ( $lev > $level_keep ) {
3794 delete_selected_tokens( $line, \@idel );
3797 } ## end loop to delete selected alignment tokens
3800 } ## end sub prune_alignment_tree
3802 sub Dump_tree_groups {
3803 my ( $rgroup, $msg ) = @_;
3806 foreach my $item ( @{$rgroup} ) {
3808 foreach (@fix) { $_ = "undef" unless defined $_; }
3815 { ## closure for sub is_marginal_match
3819 my %is_good_alignment;
3821 # This test did not give sufficiently better results to use as an update,
3822 # but the flag is worth keeping as a starting point for future testing.
3823 use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
3830 @is_if_or{@q} = (1) x scalar(@q);
3833 = **= += *= &= <<= &&=
3834 -= /= |= >>= ||= //=
3838 @is_assignment{@q} = (1) x scalar(@q);
3840 # Vertically aligning on certain "good" tokens is usually okay
3841 # so we can be less restrictive in marginal cases.
3842 @q = qw( { ? => = );
3844 @is_good_alignment{@q} = (1) x scalar(@q);
3847 sub is_marginal_match {
3849 my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
3851 # Decide if we should undo some or all of the common alignments of a
3852 # group of just two lines.
3855 # $line_0 and $line_1 - the two lines
3856 # $group_level = the indentation level of the group being processed
3857 # $imax_align = the maximum index of the common alignment tokens
3859 # $imax_prev = the maximum index of the common alignment tokens
3860 # with the line before $line_0 (=-1 of does not exist)
3863 # $is_marginal = true if the two lines should NOT be fully aligned
3864 # = false if the two lines can remain fully aligned
3865 # $imax_align = the index of the highest alignment token shared by
3866 # these two lines to keep if the match is marginal.
3868 # When we have an alignment group of just two lines like this, we are
3869 # working in the twilight zone of what looks good and what looks bad.
3870 # This routine is a collection of rules which work have been found to
3871 # work fairly well, but it will need to be updated from time to time.
3873 my $is_marginal = 0;
3875 # always keep alignments of a terminal else or ternary
3876 goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
3878 # always align lists
3879 my $group_list_type = $line_0->get_list_type();
3880 goto RETURN if ($group_list_type);
3882 # always align hanging side comments
3883 my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
3884 goto RETURN if ($is_hanging_side_comment);
3886 my $jmax_0 = $line_0->get_jmax();
3887 my $jmax_1 = $line_1->get_jmax();
3888 my $rtokens_1 = $line_1->get_rtokens();
3889 my $rtokens_0 = $line_0->get_rtokens();
3890 my $rfield_lengths_0 = $line_0->get_rfield_lengths();
3891 my $rfield_lengths_1 = $line_1->get_rfield_lengths();
3892 my $rpatterns_0 = $line_0->get_rpatterns();
3893 my $rpatterns_1 = $line_1->get_rpatterns();
3894 my $imax_next = $line_1->get_imax_pair();
3896 # We will scan the alignment tokens and set a flag '$is_marginal' if
3897 # it seems that the an alignment would look bad.
3899 my $saw_good_alignment = 0;
3900 my $saw_if_or; # if we saw an 'if' or 'or' at group level
3901 my $raw_tokb = ""; # first token seen at group level
3903 my $line_ending_fat_comma; # is last token just a '=>' ?
3907 for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
3908 my ( $raw_tok, $lev, $tag, $tok_count ) =
3909 decode_alignment_token( $rtokens_1->[$j] );
3910 if ( $raw_tok && $lev == $group_level ) {
3911 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
3912 $saw_if_or ||= $is_if_or{$raw_tok};
3915 # When the first of the two lines ends in a bare '=>' this will
3916 # probably be marginal match. (For a bare =>, the next field length
3917 # will be 2 or 3, depending on side comment)
3918 $line_ending_fat_comma =
3921 && $rfield_lengths_0->[ $j + 1 ] <= 3;
3923 my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
3925 $pad += $line_1->get_leading_space_count() -
3926 $line_0->get_leading_space_count();
3928 # Remember the pad at a leading equals
3929 if ( $raw_tok eq '=' && $lev == $group_level ) {
3932 0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
3933 $j0_max_pad = 4 if ( $j0_max_pad < 4 );
3937 if ( $pad < 0 ) { $pad = -$pad }
3938 if ( $pad > $max_pad ) { $max_pad = $pad }
3939 if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
3940 $saw_good_alignment = 1;
3943 $jfirst_bad = $j unless defined($jfirst_bad);
3945 if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
3947 # Flag this as a marginal match since patterns differ.
3948 # Normally, we will not allow just two lines to match if
3949 # marginal. But we can allow matching in some specific cases.
3951 $jfirst_bad = $j if ( !defined($jfirst_bad) );
3952 $is_marginal = 1 if ( $is_marginal == 0 );
3953 if ( $raw_tok eq '=' ) {
3955 # Here is an example of a marginal match:
3957 # $op = compile_bblock($op);
3958 # The left tokens are both identifiers, but
3959 # one accesses a hash and the other doesn't.
3960 # We'll let this be a tentative match and undo
3961 # it later if we don't find more than 2 lines
3968 $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
3970 # Turn off the "marginal match" flag in some cases...
3971 # A "marginal match" occurs when the alignment tokens agree
3972 # but there are differences in the other tokens (patterns).
3973 # If we leave the marginal match flag set, then the rule is that we
3974 # will align only if there are more than two lines in the group.
3975 # We will turn of the flag if we almost have a match
3976 # and either we have seen a good alignment token or we
3977 # just need a small pad (2 spaces) to fit. These rules are
3978 # the result of experimentation. Tokens which misaligned by just
3979 # one or two characters are annoying. On the other hand,
3980 # large gaps to less important alignment tokens are also annoying.
3981 if ( $is_marginal == 1
3982 && ( $saw_good_alignment || $max_pad < 3 ) )
3987 # We will use the line endings to help decide on alignments...
3988 # See if the lines end with semicolons...
3991 if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
3996 my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
3997 my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
3998 $sc_term0 = $pat0 =~ /;b?$/;
3999 $sc_term1 = $pat1 =~ /;b?$/;
4002 if ( !$is_marginal && !$sc_term0 ) {
4004 # First line of assignment should be semicolon terminated.
4005 # For example, do not align here:
4006 # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4007 # $$href{-NUM_DIRS} = 0;
4008 if ( $is_assignment{$raw_tokb} ) {
4013 # Try to avoid some undesirable alignments of opening tokens
4014 # for example, the space between grep and { here:
4015 # return map { ( $_ => $_ ) }
4016 # grep { /$handles/ } $self->_get_delegate_method_list;
4018 ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4020 && $sc_term0 ne $sc_term1;
4022 ########################################
4023 # return unless this is a marginal match
4024 ########################################
4025 goto RETURN if ( !$is_marginal );
4027 # Undo the marginal match flag in certain cases,
4029 # Two lines with a leading equals-like operator are allowed to
4030 # align if the patterns to the left of the equals are the same.
4031 # For example the following two lines are a marginal match but have
4032 # the same left side patterns, so we will align the equals.
4033 # my $orig = my $format = "^<<<<< ~~\n";
4035 # But these have a different left pattern so they will not be
4038 # $self->{'leftovers'} .= "<bx-seq:seq" . $';
4040 # First line semicolon terminated but second not, usually ok:
4041 # my $want = "'ab', 'a', 'b'";
4042 # my $got = join( ", ",
4043 # map { defined($_) ? "'$_'" : "undef" }
4045 # First line not semicolon terminated, Not OK to match:
4046 # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4047 # $$href{-NUM_DIRS} = 0;
4048 my $pat0 = $rpatterns_0->[0];
4049 my $pat1 = $rpatterns_1->[0];
4051 ##########################################################
4052 # Turn off the marginal flag for some types of assignments
4053 ##########################################################
4054 if ( $is_assignment{$raw_tokb} ) {
4056 # undo marginal flag if first line is semicolon terminated
4057 # and leading patters match
4058 if ($sc_term0) { # && $sc_term1) {
4059 $is_marginal = $pat0 ne $pat1;
4062 elsif ( $raw_tokb eq '=>' ) {
4064 # undo marginal flag if patterns match
4065 $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4067 elsif ( $raw_tokb eq '=~' ) {
4069 # undo marginal flag if both lines are semicolon terminated
4070 # and leading patters match
4071 if ( $sc_term1 && $sc_term0 ) {
4072 $is_marginal = $pat0 ne $pat1;
4076 ######################################################
4077 # Turn off the marginal flag if we saw an 'if' or 'or'
4078 ######################################################
4080 # A trailing 'if' and 'or' often gives a good alignment
4081 # For example, we can align these:
4082 # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
4083 # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4086 # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
4087 # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4091 # undo marginal flag if both lines are semicolon terminated
4092 if ( $sc_term0 && $sc_term1 ) {
4097 # For a marginal match, only keep matches before the first 'bad' match
4099 && defined($jfirst_bad)
4100 && $imax_align > $jfirst_bad - 1 )
4102 $imax_align = $jfirst_bad - 1;
4105 ###########################################################
4106 # Allow sweep to match lines with leading '=' in some cases
4107 ###########################################################
4108 if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4112 # If there is a following line with leading equals, or
4113 # preceding line with leading equals, then let the sweep align
4114 # them without restriction. For example, the first two lines
4115 # here are a marginal match, but they are followed by a line
4116 # with leading equals, so the sweep-lr logic can align all of
4119 # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4120 # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4121 # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4122 # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4124 # Likewise, if we reverse the two pairs we want the same result
4126 # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4127 # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4128 # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4129 # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4134 || TEST_MARGINAL_EQ_ALIGNMENT
4136 && $j0_eq_pad >= -$j0_max_pad
4137 && $j0_eq_pad <= $j0_max_pad
4141 # But do not do this if there is a comma before the '='.
4142 # For example, the first two lines below have commas and
4143 # therefore are not allowed to align with lines 3 & 4:
4145 # my ( $x, $y ) = $self->Size(); #<--line_0
4146 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4147 # my $vx = $right - $left;
4148 # my $vy = $bottom - $top;
4150 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4157 return ( $is_marginal, $imax_align );
4161 sub get_extra_leading_spaces {
4163 my ( $rlines, $rgroups ) = @_;
4165 #----------------------------------------------------------
4166 # Define any extra indentation space (for the -lp option).
4168 # If a list has side comments, sub scan_list must dump the
4169 # list before it sees everything. When this happens, it sets
4170 # the indentation to the standard scheme, but notes how
4171 # many spaces it would have liked to use. We may be able
4172 # to recover that space here in the event that all of the
4173 # lines of a list are back together again.
4174 #----------------------------------------------------------
4176 return 0 unless ( @{$rlines} && @{$rgroups} );
4178 my $object = $rlines->[0]->get_indentation();
4179 return 0 unless ( ref($object) );
4180 my $extra_leading_spaces = 0;
4181 my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4182 return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4184 my $min_spaces = $extra_indentation_spaces_wanted;
4185 if ( $min_spaces > 0 ) { $min_spaces = 0 }
4187 # loop over all groups
4189 my $ngroups = @{$rgroups};
4190 foreach my $item ( @{$rgroups} ) {
4192 my ( $jbeg, $jend ) = @{$item};
4193 foreach my $j ( $jbeg .. $jend ) {
4194 next if ( $j == 0 );
4196 # all indentation objects must be the same
4197 if ( $object != $rlines->[$j]->get_indentation() ) {
4202 # find the maximum space without exceeding the line length for this group
4203 my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4205 ( $avail > $extra_indentation_spaces_wanted )
4206 ? $extra_indentation_spaces_wanted
4209 #########################################################
4210 # Note: min spaces can be negative; for example with -gnu
4212 # do { 1; !!(my $x = bless []); }
4214 #########################################################
4215 # The following rule is needed to match older formatting:
4216 # For multiple groups, we will keep spaces non-negative.
4217 # For a single group, we will allow a negative space.
4218 if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4220 # update the minimum spacing
4221 if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4222 $extra_leading_spaces = $spaces;
4226 # update the indentation object because with -icp the terminal
4227 # ');' will use the same adjustment.
4228 $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4229 return $extra_leading_spaces;
4232 sub forget_side_comment {
4234 $self->[_last_side_comment_column_] = 0;
4238 sub is_good_side_comment_column {
4239 my ( $self, $line, $line_number, $level, $num5 ) = @_;
4241 # Upon encountering the first side comment of a group, decide if
4242 # a previous side comment should be forgotten. This involves
4243 # checking several rules.
4245 # Return true to keep old comment location
4246 # Return false to forget old comment location
4248 my $rfields = $line->get_rfields();
4249 my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4251 # RULE1: Never forget comment before a hanging side comment
4252 goto KEEP if ($is_hanging_side_comment);
4254 # RULE2: Forget a side comment after a short line difference,
4255 # where 'short line difference' is computed from a formula.
4256 # Using a smooth formula helps minimize sudden large changes.
4257 my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4258 my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4260 # '$num5' is the number of comments in the first 5 lines after the first
4261 # comment. It is needed to keep a compact group of side comments from
4262 # being influenced by a more distant side comment.
4263 $num5 = 1 unless ($num5);
4267 # $adiff $num5 $short_diff
4279 my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4282 if ( $line_diff > $short_diff );
4284 # RULE3: Forget a side comment if this line is at lower level and
4286 my $last_sc_level = $self->[_last_side_comment_level_];
4288 if ( $level < $last_sc_level
4289 && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4291 # RULE 4: Forget the last side comment if this comment might join a cached
4293 if ( my $cached_line_type = get_cached_line_type() ) {
4295 # ... otherwise side comment alignment will get messed up.
4296 # For example, in the following test script
4297 # with using 'perltidy -sct -act=2', the last comment would try to
4298 # align with the previous and then be in the wrong column when
4299 # the lines are combined:
4302 # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
4303 # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
4304 # [0, 4, 8], [2, 4, 6]
4307 if ( $cached_line_type == 2 || $cached_line_type == 4 );
4310 # Otherwise, keep it alive
4320 sub align_side_comments {
4322 my ( $self, $rlines, $rgroups ) = @_;
4324 # Align any side comments in this batch of lines
4327 # $rlines - the lines
4328 # $rgroups - the partition of the lines into groups
4330 # We will be working group-by-group because all side comments
4331 # (real or fake) in each group are already aligned. So we just have
4332 # to make alignments between groups wherever possible.
4334 # An unusual aspect is that within each group we have aligned both real
4335 # and fake side comments. This has the consequence that the lengths of
4336 # long lines without real side comments can cause 'push' all side comments
4337 # to the right. This seems unusual, but testing with and without this
4338 # feature shows that it is usually better this way. Othewise, side
4339 # comments can be hidden between long lines without side comments and
4340 # thus be harder to read.
4342 my $group_level = $self->[_group_level_];
4343 my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4344 && $group_level == $self->[_last_level_written_];
4346 # Find groups with side comments, and remember the first nonblank comment
4350 foreach my $item ( @{$rgroups} ) {
4352 my ( $jbeg, $jend ) = @{$item};
4353 foreach my $j ( $jbeg .. $jend ) {
4354 my $line = $rlines->[$j];
4355 my $jmax = $line->get_jmax();
4356 if ( $line->get_rfield_lengths()->[$jmax] ) {
4358 # this group has a line with a side comment
4360 if ( !defined($j_sc_beg) ) {
4368 # done if no groups with side comments
4369 return unless @todo;
4371 # Count $num5 = number of comments in the 5 lines after the first comment
4372 # This is an important factor in a decision formula
4374 for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
4375 my $ldiff = $jj - $j_sc_beg;
4376 last if ( $ldiff > 5 );
4377 my $line = $rlines->[$jj];
4378 my $jmax = $line->get_jmax();
4379 my $sc_len = $line->get_rfield_lengths()->[$jmax];
4380 next unless ($sc_len);
4384 # Forget the old side comment location if necessary
4385 my $line = $rlines->[$j_sc_beg];
4387 $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4389 $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
4390 my $last_side_comment_column =
4391 $keep_it ? $self->[_last_side_comment_column_] : 0;
4393 # If there are multiple groups we will do two passes
4394 # so that we can find a common alignment for all groups.
4395 my $MAX_PASS = @todo > 1 ? 2 : 1;
4398 my $max_comment_column = $last_side_comment_column;
4399 for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
4401 # If there are two passes, then on the last pass make the old column
4402 # equal to the largest of the group. This will result in the comments
4403 # being aligned if possible.
4404 if ( $PASS == $MAX_PASS ) {
4405 $last_side_comment_column = $max_comment_column;
4408 # Loop over the groups with side comments
4410 foreach my $ng (@todo) {
4411 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4413 # Note that since all lines in a group have common alignments, we
4414 # just have to work on one of the lines (the first line).
4415 my $line = $rlines->[$jbeg];
4416 my $jmax = $line->get_jmax();
4417 my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4419 if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4421 # the maximum space without exceeding the line length:
4422 my $avail = $line->get_available_space_on_right();
4424 # try to use the previous comment column
4425 my $side_comment_column = $line->get_column( $jmax - 1 );
4426 my $move = $last_side_comment_column - $side_comment_column;
4428 # Remember the maximum possible column of the first line with
4430 if ( !defined($column_limit) ) {
4431 $column_limit = $side_comment_column + $avail;
4434 next if ( $jmax <= 0 );
4436 # but if this doesn't work, give up and use the minimum space
4437 my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4438 if ( $move > $avail ) {
4442 # but we want some minimum space to the comment
4445 && $continuing_sc_flow )
4450 # remove constraints on hanging side comments
4451 if ($is_hanging_side_comment) { $min_move = 0 }
4453 if ( $move < $min_move ) {
4457 # don't exceed the available space
4458 if ( $move > $avail ) { $move = $avail }
4460 # We can only increase space, never decrease.
4461 if ( $move < 0 ) { $move = 0 }
4463 # Discover the largest column on the preliminary pass
4464 if ( $PASS < $MAX_PASS ) {
4465 my $col = $line->get_column( $jmax - 1 ) + $move;
4467 # but ignore columns too large for the starting line
4468 if ( $col > $max_comment_column && $col < $column_limit ) {
4469 $max_comment_column = $col;
4473 # Make the changes on the final pass
4475 $line->increase_field_width( $jmax - 1, $move );
4477 # remember this column for the next group
4478 $last_side_comment_column = $line->get_column( $jmax - 1 );
4480 } ## end loop over groups
4481 } ## end loop over passes
4483 # Find the last side comment
4485 my $ng_last = $todo[-1];
4486 my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4487 for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
4488 my $line = $rlines->[$jj];
4489 my $jmax = $line->get_jmax();
4490 if ( $line->get_rfield_lengths()->[$jmax] ) {
4496 # Save final side comment info for possible use by the next batch
4497 if ( defined($j_sc_last) ) {
4499 $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4500 $self->[_last_side_comment_column_] = $last_side_comment_column;
4501 $self->[_last_side_comment_line_number_] = $line_number;
4502 $self->[_last_side_comment_level_] = $group_level;
4507 ###############################
4508 # CODE SECTION 6: Output Step A
4509 ###############################
4511 sub valign_output_step_A {
4513 ###############################################################
4514 # This is Step A in writing vertically aligned lines.
4515 # The line is prepared according to the alignments which have
4516 # been found. Then it is shipped to the next step.
4517 ###############################################################
4519 my ( $self, $rinput_hash ) = @_;
4521 my $line = $rinput_hash->{line};
4522 my $min_ci_gap = $rinput_hash->{min_ci_gap};
4523 my $do_not_align = $rinput_hash->{do_not_align};
4524 my $group_leader_length = $rinput_hash->{group_leader_length};
4525 my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4526 my $level = $rinput_hash->{level};
4528 my $rfields = $line->get_rfields();
4529 my $rfield_lengths = $line->get_rfield_lengths();
4530 my $leading_space_count = $line->get_leading_space_count();
4531 my $outdent_long_lines = $line->get_outdent_long_lines();
4532 my $maximum_field_index = $line->get_jmax();
4533 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
4534 my $Kend = $line->get_Kend();
4535 my $level_end = $line->get_level_end();
4537 # add any extra spaces
4538 if ( $leading_space_count > $group_leader_length ) {
4539 $leading_space_count += $min_ci_gap;
4542 my $str = $rfields->[0];
4543 my $str_len = $rfield_lengths->[0];
4545 # loop to concatenate all fields of this line and needed padding
4546 my $total_pad_count = 0;
4547 for my $j ( 1 .. $maximum_field_index ) {
4549 # skip zero-length side comments
4552 ( $j == $maximum_field_index )
4553 && ( !defined( $rfields->[$j] )
4554 || ( $rfield_lengths->[$j] == 0 ) )
4557 # compute spaces of padding before this field
4558 my $col = $line->get_column( $j - 1 );
4559 my $pad = $col - ( $str_len + $leading_space_count );
4561 if ($do_not_align) {
4563 ( $j < $maximum_field_index )
4565 : $self->[_rOpts_minimum_space_to_comment_] - 1;
4568 # if the -fpsc flag is set, move the side comment to the selected
4569 # column if and only if it is possible, ignoring constraints on
4570 # line length and minimum space to comment
4571 if ( $self->[_rOpts_fixed_position_side_comment_]
4572 && $j == $maximum_field_index )
4575 $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4576 if ( $newpad >= 0 ) { $pad = $newpad; }
4579 # accumulate the padding
4580 if ( $pad > 0 ) { $total_pad_count += $pad; }
4582 # only add padding when we have a finite field;
4583 # this avoids extra terminal spaces if we have empty fields
4584 if ( $rfield_lengths->[$j] > 0 ) {
4585 $str .= ' ' x $total_pad_count;
4586 $str_len += $total_pad_count;
4587 $total_pad_count = 0;
4588 $str .= $rfields->[$j];
4589 $str_len += $rfield_lengths->[$j];
4592 $total_pad_count = 0;
4596 my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4598 # ship this line off
4599 $self->valign_output_step_B(
4601 leading_space_count => $leading_space_count + $extra_leading_spaces,
4603 line_length => $str_len,
4604 side_comment_length => $side_comment_length,
4605 outdent_long_lines => $outdent_long_lines,
4606 rvertical_tightness_flags => $rvertical_tightness_flags,
4608 level_end => $level_end,
4615 sub combine_fields {
4617 # We have a group of two lines for which we do not want to align tokens
4618 # between index $imax_align and the side comment. So we will delete fields
4619 # between $imax_align and the side comment. Alignments have already
4620 # been set so we have to adjust them.
4622 my ( $line_0, $line_1, $imax_align ) = @_;
4624 if ( !defined($imax_align) ) { $imax_align = -1 }
4626 # First delete the unwanted tokens
4627 my $jmax_old = $line_0->get_jmax();
4628 my @old_alignments = $line_0->get_alignments();
4629 my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
4631 return unless (@idel);
4633 foreach my $line ( $line_0, $line_1 ) {
4634 delete_selected_tokens( $line, \@idel );
4637 # Now adjust the alignments. Note that the side comment alignment
4638 # is always at jmax-1, and there is an ending alignment at jmax.
4640 if ( $imax_align >= 0 ) {
4641 @new_alignments[ 0 .. $imax_align ] =
4642 @old_alignments[ 0 .. $imax_align ];
4645 my $jmax_new = $line_0->get_jmax();
4647 $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4648 $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4649 $line_0->set_alignments(@new_alignments);
4650 $line_1->set_alignments(@new_alignments);
4654 sub get_output_line_number {
4656 # The output line number reported to a caller =
4657 # the number of items still in the buffer +
4658 # the number of items written.
4659 return $_[0]->group_line_count() +
4660 $_[0]->[_file_writer_object_]->get_output_line_number();
4663 ###############################
4664 # CODE SECTION 7: Output Step B
4665 ###############################
4667 { ## closure for sub valign_output_step_B
4669 # These are values for a cache used by valign_output_step_B.
4670 my $cached_line_text;
4671 my $cached_line_text_length;
4672 my $cached_line_type;
4673 my $cached_line_flag;
4675 my $cached_line_valid;
4676 my $cached_line_leading_space_count;
4677 my $cached_seqno_string;
4678 my $cached_line_Kend;
4680 my $last_nonblank_seqno_string;
4682 sub get_seqno_string {
4683 return $seqno_string;
4686 sub get_last_nonblank_seqno_string {
4687 return $last_nonblank_seqno_string;
4690 sub set_last_nonblank_seqno_string {
4692 $last_nonblank_seqno_string = $val;
4696 sub get_cached_line_flag {
4697 return $cached_line_flag;
4700 sub get_cached_line_type {
4701 return $cached_line_type;
4704 sub set_cached_line_valid {
4706 $cached_line_valid = $val;
4710 sub get_cached_seqno {
4711 return $cached_seqno;
4714 sub initialize_step_B_cache {
4716 # valign_output_step_B cache:
4717 $cached_line_text = "";
4718 $cached_line_text_length = 0;
4719 $cached_line_type = 0;
4720 $cached_line_flag = 0;
4722 $cached_line_valid = 0;
4723 $cached_line_leading_space_count = 0;
4724 $cached_seqno_string = "";
4725 $cached_line_Kend = undef;
4727 # These vars hold a string of sequence numbers joined together used by
4730 $last_nonblank_seqno_string = "";
4736 if ($cached_line_type) {
4737 $seqno_string = $cached_seqno_string;
4738 $self->valign_output_step_C(
4740 $cached_line_leading_space_count,
4741 $self->[_last_level_written_],
4744 $cached_line_type = 0;
4745 $cached_line_text = "";
4746 $cached_line_text_length = 0;
4747 $cached_seqno_string = "";
4748 $cached_line_Kend = undef;
4753 sub valign_output_step_B {
4755 ###############################################################
4756 # This is Step B in writing vertically aligned lines.
4757 # Vertical tightness is applied according to preset flags.
4758 # In particular this routine handles stacking of opening
4759 # and closing tokens.
4760 ###############################################################
4762 my ( $self, $rinput ) = @_;
4764 my $leading_space_count = $rinput->{leading_space_count};
4765 my $str = $rinput->{line};
4766 my $str_length = $rinput->{line_length};
4767 my $side_comment_length = $rinput->{side_comment_length};
4768 my $outdent_long_lines = $rinput->{outdent_long_lines};
4769 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4770 my $level = $rinput->{level};
4771 my $level_end = $rinput->{level_end};
4772 my $Kend = $rinput->{Kend};
4774 my $last_level_written = $self->[_last_level_written_];
4776 # Useful -gcs test cases for wide characters are
4777 # perl527/(method.t.2, reg_mesg.t, mime-header.t)
4779 # handle outdenting of long lines:
4780 my $is_outdented_line;
4781 if ($outdent_long_lines) {
4784 $side_comment_length +
4785 $leading_space_count -
4786 $self->maximum_line_length_for_level($level);
4787 if ( $excess > 0 ) {
4788 $leading_space_count = 0;
4789 my $file_writer_object = $self->[_file_writer_object_];
4790 my $last_outdented_line_at =
4791 $file_writer_object->get_output_line_number();
4792 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
4794 my $outdented_line_count = $self->[_outdented_line_count_];
4795 unless ($outdented_line_count) {
4796 $self->[_first_outdented_line_at_] =
4797 $last_outdented_line_at;
4799 $outdented_line_count++;
4800 $self->[_outdented_line_count_] = $outdented_line_count;
4801 $is_outdented_line = 1;
4805 # Make preliminary leading whitespace. It could get changed
4806 # later by entabbing, so we have to keep track of any changes
4807 # to the leading_space_count from here on.
4808 my $leading_string =
4809 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
4810 my $leading_string_length = length($leading_string);
4812 # Unpack any recombination data; it was packed by
4813 # sub send_lines_to_vertical_aligner. Contents:
4815 # [0] type: 1=opening non-block 2=closing non-block
4816 # 3=opening block brace 4=closing block brace
4817 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
4818 # if closing: spaces of padding to use
4819 # [2] sequence number of container
4820 # [3] valid flag: do not append if this flag is false
4822 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
4824 if ($rvertical_tightness_flags) {
4826 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
4828 ) = @{$rvertical_tightness_flags};
4831 $seqno_string = $seqno_end;
4833 # handle any cached line ..
4834 # either append this line to it or write it out
4835 # Note: the function length() is used in this next test out of caution.
4836 # All testing has shown that the variable $cached_line_text_length is
4837 # correct, but its calculation is complex and a loss of cached text
4838 # would be a disaster.
4839 if ( length($cached_line_text) ) {
4841 # Dump an invalid cached line
4842 if ( !$cached_line_valid ) {
4843 $self->valign_output_step_C(
4844 $cached_line_text, $cached_line_leading_space_count,
4845 $last_level_written, $cached_line_Kend
4849 # Handle cached line ending in OPENING tokens
4850 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
4852 my $gap = $leading_space_count - $cached_line_text_length;
4854 # handle option of just one tight opening per line:
4855 if ( $cached_line_flag == 1 ) {
4856 if ( defined($open_or_close) && $open_or_close == 1 ) {
4861 # Do not join the lines if this might produce a one-line
4862 # container which exceeds the maximum line length. This is
4863 # necessary prevent blinking, particularly with the combination
4864 # -xci -pvt=2. In that case a one-line block alternately forms
4865 # and breaks, causing -xci to alternately turn on and off (case
4867 # Patched to fix cases b656 b862 b971 b972: always do the check
4868 # if -vmll is set. The reason is that the -vmll option can
4869 # cause changes in the maximum line length, leading to blinkers
4873 && ( $self->[_rOpts_variable_maximum_line_length_]
4874 || ( defined($level_end) && $level > $level_end ) )
4877 my $test_line_length =
4878 $cached_line_text_length + $gap + $str_length;
4879 my $maximum_line_length =
4880 $self->maximum_line_length_for_level($last_level_written);
4882 # Add a small tolerance in the length test (fixes case b862)
4883 if ( $test_line_length > $maximum_line_length - 2 ) {
4888 if ( $gap >= 0 && defined($seqno_beg) ) {
4889 $leading_string = $cached_line_text . ' ' x $gap;
4890 $leading_string_length = $cached_line_text_length + $gap;
4891 $leading_space_count = $cached_line_leading_space_count;
4892 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
4893 $level = $last_level_written;
4896 $self->valign_output_step_C(
4897 $cached_line_text, $cached_line_leading_space_count,
4898 $last_level_written, $cached_line_Kend
4903 # Handle cached line ending in CLOSING tokens
4906 $cached_line_text . ' ' x $cached_line_flag . $str;
4907 my $test_line_length =
4908 $cached_line_text_length + $cached_line_flag + $str_length;
4911 # The new line must start with container
4914 # The container combination must be okay..
4917 # okay to combine like types
4918 ( $open_or_close == $cached_line_type )
4920 # closing block brace may append to non-block
4921 || ( $cached_line_type == 2 && $open_or_close == 4 )
4923 # something like ');'
4924 || ( !$open_or_close && $cached_line_type == 2 )
4928 # The combined line must fit
4930 $test_line_length <=
4931 $self->maximum_line_length_for_level(
4932 $last_level_written)
4937 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
4939 # Patch to outdent closing tokens ending # in ');' If we
4940 # are joining a line like ');' to a previous stacked set of
4941 # closing tokens, then decide if we may outdent the
4942 # combined stack to the indentation of the ');'. Since we
4943 # should not normally outdent any of the other tokens more
4944 # than the indentation of the lines that contained them, we
4945 # will only do this if all of the corresponding opening
4946 # tokens were on the same line. This can happen with -sot
4949 # For example, it is ok here:
4950 # __PACKAGE__->load_components( qw(
4955 # But, for example, we do not outdent in this example
4956 # because that would put the closing sub brace out farther
4957 # than the opening sub brace:
4959 # perltidy -sot -sct
4961 # '<Control-f>' => sub {
4963 # my $e = $c->XEvent;
4964 # itemsUnderArea $c;
4968 && $cached_line_text =~ /^[\)\}\]\s]*$/ )
4971 # The way to tell this is if the stacked sequence
4972 # numbers of this output line are the reverse of the
4973 # stacked sequence numbers of the previous non-blank
4974 # line of sequence numbers. So we can join if the
4975 # previous nonblank string of tokens is the mirror
4976 # image. For example if stack )}] is 13:8:6 then we
4977 # are looking for a leading stack like [{( which
4978 # is 6:8:13. We only need to check the two ends,
4979 # because the intermediate tokens must fall in order.
4980 # Note on speed: having to split on colons and
4981 # eliminate multiple colons might appear to be slow,
4982 # but it's not an issue because we almost never come
4983 # through here. In a typical file we don't.
4985 $seqno_string =~ s/^:+//;
4986 $last_nonblank_seqno_string =~ s/^:+//;
4987 $seqno_string =~ s/:+/:/g;
4988 $last_nonblank_seqno_string =~ s/:+/:/g;
4990 # how many spaces can we outdent?
4992 $cached_line_leading_space_count -
4993 $leading_space_count;
4995 && length($seqno_string)
4996 && length($last_nonblank_seqno_string) ==
4997 length($seqno_string) )
5000 ( split /:/, $last_nonblank_seqno_string );
5001 my @seqno_now = ( split /:/, $seqno_string );
5004 && $seqno_now[-1] == $seqno_last[0]
5005 && $seqno_now[0] == $seqno_last[-1] )
5009 # for absolute safety, be sure we only remove
5011 my $ws = substr( $test_line, 0, $diff );
5012 if ( ( length($ws) == $diff )
5016 $test_line = substr( $test_line, $diff );
5017 $cached_line_leading_space_count -= $diff;
5018 $last_level_written =
5019 $self->level_change(
5020 $cached_line_leading_space_count,
5021 $diff, $last_level_written );
5022 $self->reduce_valign_buffer_indentation(
5026 # shouldn't happen, but not critical:
5028 ## ERROR transferring indentation here
5035 $str_length = $test_line_length;
5036 $leading_string = "";
5037 $leading_string_length = 0;
5038 $leading_space_count = $cached_line_leading_space_count;
5039 $level = $last_level_written;
5042 $self->valign_output_step_C(
5043 $cached_line_text, $cached_line_leading_space_count,
5044 $last_level_written, $cached_line_Kend
5049 $cached_line_type = 0;
5050 $cached_line_text = "";
5051 $cached_line_text_length = 0;
5052 $cached_line_Kend = undef;
5054 # make the line to be written
5055 my $line = $leading_string . $str;
5056 my $line_length = $leading_string_length + $str_length;
5058 # Safety check: be sure that a line to be cached as a stacked block
5059 # brace line ends in the appropriate opening or closing block brace.
5060 # This should always be the case if the caller set flags correctly.
5061 # Code '3' is for -sobb, code '4' is for -scbb.
5062 if ($open_or_close) {
5063 if ( $open_or_close == 3 && $line !~ /\{\s*$/
5064 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5070 # write or cache this line ...
5071 # fix for case b999: do not cache an outdented line
5072 if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
5074 $self->valign_output_step_C( $line, $leading_space_count, $level,
5078 $cached_line_text = $line;
5079 $cached_line_text_length = $line_length;
5080 $cached_line_type = $open_or_close;
5081 $cached_line_flag = $tightness_flag;
5082 $cached_seqno = $seqno;
5083 $cached_line_valid = $valid;
5084 $cached_line_leading_space_count = $leading_space_count;
5085 $cached_seqno_string = $seqno_string;
5086 $cached_line_Kend = $Kend;
5089 $self->[_last_level_written_] = $level;
5090 $self->[_last_side_comment_length_] = $side_comment_length;
5095 ###############################
5096 # CODE SECTION 8: Output Step C
5097 ###############################
5099 { ## closure for sub valign_output_step_C
5101 # Vertical alignment buffer used by valign_output_step_C
5102 my $valign_buffer_filling;
5105 sub initialize_valign_buffer {
5106 @valign_buffer = ();
5107 $valign_buffer_filling = "";
5111 sub dump_valign_buffer {
5113 if (@valign_buffer) {
5114 foreach (@valign_buffer) {
5115 $self->valign_output_step_D( @{$_} );
5117 @valign_buffer = ();
5119 $valign_buffer_filling = "";
5123 sub reduce_valign_buffer_indentation {
5125 my ( $self, $diff ) = @_;
5126 if ( $valign_buffer_filling && $diff ) {
5127 my $max_valign_buffer = @valign_buffer;
5128 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5129 my ( $line, $leading_space_count, $level, $Kend ) =
5130 @{ $valign_buffer[$i] };
5131 my $ws = substr( $line, 0, $diff );
5132 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5133 $line = substr( $line, $diff );
5135 if ( $leading_space_count >= $diff ) {
5136 $leading_space_count -= $diff;
5138 $self->level_change( $leading_space_count, $diff,
5141 $valign_buffer[$i] =
5142 [ $line, $leading_space_count, $level, $Kend ];
5148 sub valign_output_step_C {
5150 ###############################################################
5151 # This is Step C in writing vertically aligned lines.
5152 # Lines are either stored in a buffer or passed along to the next step.
5153 # The reason for storing lines is that we may later want to reduce their
5154 # indentation when -sot and -sct are both used.
5155 ###############################################################
5156 my ( $self, @args ) = @_;
5158 my $seqno_string = get_seqno_string();
5159 my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
5161 # Dump any saved lines if we see a line with an unbalanced opening or
5163 $self->dump_valign_buffer()
5164 if ( $seqno_string && $valign_buffer_filling );
5166 # Either store or write this line
5167 if ($valign_buffer_filling) {
5168 push @valign_buffer, [@args];
5171 $self->valign_output_step_D(@args);
5174 # For lines starting or ending with opening or closing tokens..
5175 if ($seqno_string) {
5176 $last_nonblank_seqno_string = $seqno_string;
5177 set_last_nonblank_seqno_string($seqno_string);
5179 # Start storing lines when we see a line with multiple stacked
5181 # patch for RT #94354, requested by Colin Williams
5182 if ( $seqno_string =~ /^\d+(\:+\d+)+$/
5183 && $args[0] !~ /^[\}\)\]\:\?]/ )
5186 # This test is efficient but a little subtle: The first test
5187 # says that we have multiple sequence numbers and hence
5188 # multiple opening or closing tokens in this line. The second
5189 # part of the test rejects stacked closing and ternary tokens.
5190 # So if we get here then we should have stacked unbalanced
5193 # Here is a complex example:
5195 # Foo($Bar[0], { # (side comment)
5199 # The first line has sequence 6::4. It does not begin with
5200 # a closing token or ternary, so it passes the test and must be
5201 # stacked opening tokens.
5203 # The last line has sequence 4:6 but is a stack of closing
5204 # tokens, so it gets rejected.
5206 # Note that the sequence number of an opening token for a qw
5207 # quote is a negative number and will be rejected. For
5208 # example, for the following line: skip_symbols([qw(
5209 # $seqno_string='10:5:-1'. It would be okay to accept it but I
5210 # decided not to do this after testing.
5212 $valign_buffer_filling = $seqno_string;
5220 ###############################
5221 # CODE SECTION 9: Output Step D
5222 ###############################
5224 sub valign_output_step_D {
5226 ###############################################################
5227 # This is Step D in writing vertically aligned lines.
5228 # It is the end of the vertical alignment pipeline.
5229 # Write one vertically aligned line of code to the output object.
5230 ###############################################################
5232 my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5234 # The line is currently correct if there is no tabbing (recommended!)
5235 # We may have to lop off some leading spaces and replace with tabs.
5236 if ( $leading_space_count > 0 ) {
5238 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5239 my $rOpts_tabs = $self->[_rOpts_tabs_];
5240 my $rOpts_entab_leading_whitespace =
5241 $self->[_rOpts_entab_leading_whitespace_];
5243 # Nothing to do if no tabs
5244 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5245 || $rOpts_indent_columns <= 0 )
5251 # Handle entab option
5252 elsif ($rOpts_entab_leading_whitespace) {
5254 # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5255 # not correctly entabbed, nor were side comments: Increase leading
5256 # space count for a padded line to get correct tabbing
5257 if ( $line =~ /^(\s+)(.*)$/ ) {
5258 my $spaces = length($1);
5259 if ( $spaces > $leading_space_count ) {
5260 $leading_space_count = $spaces;
5265 $leading_space_count % $rOpts_entab_leading_whitespace;
5267 int( $leading_space_count / $rOpts_entab_leading_whitespace );
5268 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
5269 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5270 substr( $line, 0, $leading_space_count ) = $leading_string;
5274 # shouldn't happen - program error counting whitespace
5278 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5283 # Handle option of one tab per level
5285 my $leading_string = ( "\t" x $level );
5287 $leading_space_count - $level * $rOpts_indent_columns;
5290 if ( $space_count < 0 ) {
5292 # But it could be an outdented comment
5293 if ( $line !~ /^\s*#/ ) {
5296 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5299 $leading_string = ( ' ' x $leading_space_count );
5302 $leading_string .= ( ' ' x $space_count );
5304 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5305 substr( $line, 0, $leading_space_count ) = $leading_string;
5309 # shouldn't happen - program error counting whitespace
5310 # we'll skip entabbing
5313 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5318 my $file_writer_object = $self->[_file_writer_object_];
5319 $file_writer_object->write_code_line( $line . "\n", $Kend );
5324 { ## closure for sub get_leading_string
5326 my @leading_string_cache;
5328 sub initialize_leading_string_cache {
5329 @leading_string_cache = ();
5333 sub get_leading_string {
5335 # define the leading whitespace string for this line..
5336 my ( $self, $leading_whitespace_count, $group_level ) = @_;
5338 # Handle case of zero whitespace, which includes multi-line quotes
5339 # (which may have a finite level; this prevents tab problems)
5340 if ( $leading_whitespace_count <= 0 ) {
5344 # look for previous result
5345 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5346 return $leading_string_cache[$leading_whitespace_count];
5349 # must compute a string for this number of spaces
5352 # Handle simple case of no tabs
5353 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5354 my $rOpts_tabs = $self->[_rOpts_tabs_];
5355 my $rOpts_entab_leading_whitespace =
5356 $self->[_rOpts_entab_leading_whitespace_];
5358 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5359 || $rOpts_indent_columns <= 0 )
5361 $leading_string = ( ' ' x $leading_whitespace_count );
5364 # Handle entab option
5365 elsif ($rOpts_entab_leading_whitespace) {
5367 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5368 my $tab_count = int(
5369 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5370 $leading_string = "\t" x $tab_count . ' ' x $space_count;
5373 # Handle option of one tab per level
5375 $leading_string = ( "\t" x $group_level );
5377 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5380 if ( $space_count < 0 ) {
5383 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5387 $leading_string = ( ' ' x $leading_whitespace_count );
5390 $leading_string .= ( ' ' x $space_count );
5393 $leading_string_cache[$leading_whitespace_count] = $leading_string;
5394 return $leading_string;
5396 } # end get_leading_string
5398 ##########################
5399 # CODE SECTION 10: Summary
5400 ##########################
5402 sub report_anything_unusual {
5405 my $outdented_line_count = $self->[_outdented_line_count_];
5406 if ( $outdented_line_count > 0 ) {
5407 $self->write_logfile_entry(
5408 "$outdented_line_count long lines were outdented:\n");
5409 my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5410 $self->write_logfile_entry(
5411 " First at output line $first_outdented_line_at\n");
5413 if ( $outdented_line_count > 1 ) {
5414 my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5415 $self->write_logfile_entry(
5416 " Last at output line $last_outdented_line_at\n");
5418 $self->write_logfile_entry(
5419 " use -noll to prevent outdenting, -l=n to increase line length\n"
5421 $self->write_logfile_entry("\n");