- This version is about 20% faster than the previous version due to optimizations
made with the help of Devel::NYTProf.
+ - A better test for convergence has been added. When iterations are requested,
+ the new test will stop after the first pass if no changes in line break
+ locations are made. Previously, at least two passes were required to verify
+ convergnece unless the output stream had the same checksum as the input stream.
+ Extensive testing has been made to verify the correctness of the new test.
+
- Line breaks are now automatically placed after 'use overload' to
improve formatting when there are numerous overloaded operators. For
example
use Perl::Tidy::VerticalAligner;
local $| = 1;
+# this can be turned on for extra checking during development
+use constant DEVEL_MODE => 0;
+
use vars qw{
$VERSION
@ISA
my $debugger_object_final = $debugger_object;
my $logger_object_final = $logger_object;
my $fh_tee_final = $fh_tee;
+ my $iteration_of_formatter_convergence;
foreach my $iter ( 1 .. $max_iterations ) {
#---------------------------------------------------------------
$source_object->close_input_file();
+ # see if the formatter is converged
+ if ( $max_iterations > 1
+ && !defined($iteration_of_formatter_convergence)
+ && $formatter->can('get_convergence_check') )
+ {
+ if ( $formatter->get_convergence_check() ) {
+ $iteration_of_formatter_convergence = $iter;
+ }
+ }
+
# line source for next iteration (if any) comes from the current
# temporary output buffer
if ( $iter < $max_iterations ) {
# stop iterations if errors or converged
my $stop_now = $tokenizer->report_tokenization_errors();
$stop_now ||= $tokenizer->get_unexpected_error_count();
+ my $stopping_on_error = $stop_now;
if ($stop_now) {
$convergence_log_message = <<EOM;
Stopping iterations because of severe errors.
}
elsif ($do_convergence_test) {
+ # FIXME: future convergence test
+ ## $stop_now ||= defined($iteration_of_formatter_convergence);
+
my $digest = $md5_hex->($sink_buffer);
if ( !defined( $saw_md5{$digest} ) ) {
$saw_md5{$digest} = $iter;
if ($stop_now) {
+ if (DEVEL_MODE) { #<<<
+ if ( defined($iteration_of_formatter_convergence) ) {
+ if ( $iteration_of_formatter_convergence < $iter - 1 ) {
+ print STDERR
+"STRANGE Early conv in $display_name: Stopping on it=$iter, converged in formatter on $iteration_of_formatter_convergence\n";
+ }
+ }
+ elsif ( !$stopping_on_error ) {
+ print STDERR
+"STRANGE no conv in $display_name: stopping on it=$iter, but not converged in formatter\n";
+ }
+ }
+
# we are stopping the iterations early;
# copy the output stream to its final destination
$sink_object = $sink_object_final;
_line_length_error_count_ => $i++,
_max_output_line_length_ => $i++,
_max_output_line_length_at_ => $i++,
+ _rK_checklist_ => $i++,
+ _K_arrival_order_matches_ => $i++,
+ _K_sequence_error_msg_ => $i++,
+ _K_last_arrival_ => $i++,
};
}
$self->[_line_length_error_count_] = 0;
$self->[_max_output_line_length_] = 0;
$self->[_max_output_line_length_at_] = 0;
+ $self->[_rK_checklist_] = [];
+ $self->[_K_arrival_order_matches_] = 0;
+ $self->[_K_sequence_error_msg_] = "";
+ $self->[_K_last_arrival_] = -1;
bless $self, $class;
return $self;
}
+sub setup_convergence_test {
+ my ( $self, $rlist ) = @_;
+ if ( @{$rlist} ) {
+
+ # We are going to destroy the list, so make a copy
+ # and put in reverse order so we can pop values
+ my @list = @{$rlist};
+ if ( $list[0] < $list[-1] ) {
+ @list = reverse @list;
+ }
+ $self->[_rK_checklist_] = \@list;
+ }
+ $self->[_K_arrival_order_matches_] = 1;
+ $self->[_K_sequence_error_msg_] = "";
+ $self->[_K_last_arrival_] = -1;
+ return;
+}
+
+sub get_convergence_check {
+ my ($self) = @_;
+ my $rlist = $self->[_rK_checklist_];
+
+ # converged if all K arrived and in correct order
+ return $self->[_K_arrival_order_matches_] && !@{$rlist};
+}
+
+sub get_K_sequence_error_msg {
+ my ($self) = @_;
+ return $self->[_K_sequence_error_msg_];
+}
+
sub get_output_line_number {
return $_[0]->[_output_line_number_];
}
}
sub write_code_line {
- my $self = shift;
- my $a = shift;
+ my ( $self, $a, $K ) = @_;
if ( $a =~ /^\s*$/ ) {
my $rOpts = $self->[_rOpts_];
$self->[_consecutive_nonblank_lines_]++;
}
$self->write_line($a);
+
+ if ( defined($K) ) {
+
+ # Convergence check: we are checking if all defined K values arrive in
+ # the order which was defined by the caller. Quite checking if any
+ # unexpected K value arrives.
+ if ( $self->[_K_arrival_order_matches_] ) {
+ my $Kt = pop @{ $self->[_rK_checklist_] };
+ if ( !defined($Kt) || $Kt != $K ) {
+ $self->[_K_arrival_order_matches_] = 0;
+ }
+ }
+
+ # check for out-of-order arrivals of K (shouldn't happen).
+ if ( !$self->[_K_sequence_error_msg_] ) {
+ my $K_prev = $self->[_K_last_arrival_];
+ if ( $K < $K_prev ) {
+ my $str = $a;
+ chomp $str;
+ if ( length($str) > 80 ) {
+ $str = substr( $str, 0, 80 ) . "...";
+ }
+ my $msg = <<EOM;
+Lines have arrived out of order in sub 'write_code_line'
+as detected by token index K=$K arriving after index K=$K_prev. The line
+$str
+EOM
+
+ # TODO: This message should go out as a warning after testing
+ # For now it is being stored.
+ $self->[_K_sequence_error_msg_] = $msg;
+ }
+ }
+ $self->[_K_last_arrival_] = $K;
+ }
return;
}
return;
}
1;
-
_rKrange_code_without_comments_ => $i++,
_rbreak_before_Kfirst_ => $i++,
_rbreak_after_Klast_ => $i++,
+ _converged_ => $i++,
};
_rK_to_go_ => $i++,
_batch_count_ => $i++,
_rix_seqno_controlling_ci_ => $i++,
+ _batch_CODE_type_ => $i++,
};
# Sequence number assigned to the root of sequence tree.
$self->[_rKrange_code_without_comments_] = [];
$self->[_rbreak_before_Kfirst_] = {};
$self->[_rbreak_after_Klast_] = {};
+ $self->[_converged_] = 0;
# This flag will be updated later by a call to get_save_logfile()
$self->[_save_logfile_] = defined($logger_object);
}
} ## end closure for diagnostics routines
+sub get_convergence_check {
+ my ($self) = @_;
+ return $self->[_converged_];
+}
+
sub get_added_semicolon_count {
my $self = shift;
return $self->[_added_semicolon_count_];
# with spaces separating any number of items. Each item consists of three
# pieces of information:
# <optional position> <optional type> <type of container>
- # < ^ or . > <[k K f F w W]> < ( [ { >
+ # < ^ or . > < k or K > < ( [ { >
# The last character is the required container type and must be one of:
# ( = paren
# token selects to which the rule applies:
# k = any keyword
# K = any non-keyword
- # f = function
+ # f = function call
# F = not a function call
# w = function or keyword
# W = not a function or keyword
my $Klimit = $self->[_Klimit_];
my $rlines = $self->[_rlines_];
my @Krange_code_without_comments;
+ my @Klast_valign_code;
# Re-construct the arrays of tokens associated with the original input lines
# since they have probably changed due to inserting and deleting blanks
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
my $line_type = $line_of_tokens->{_line_type};
+ my $CODE_type = $line_of_tokens->{_code_type};
if ( $line_type eq 'CODE' ) {
my @K_array;
if ( $Knext <= $Kmax ) {
$inext = $rLL->[$Knext]->[_LINE_INDEX_];
while ( $inext <= $iline ) {
- push @{K_array}, $Knext;
+ push @K_array, $Knext;
$Knext += 1;
if ( $Knext > $Kmax ) {
$inext = undef;
$Klast = $K_array[-1];
$Klast_out = $Klast;
- # Save ranges of non-comment code. This will be used by
- # sub keep_old_line_breaks.
- if ( defined($Kfirst) && $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
- push @Krange_code_without_comments, [ $Kfirst, $Klast ];
+ if ( defined($Kfirst) ) {
+
+ # Save ranges of non-comment code. This will be used by
+ # sub keep_old_line_breaks.
+ if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
+ push @Krange_code_without_comments, [ $Kfirst, $Klast ];
+ }
+
+ # Only save ending K indexes of code types which are blank
+ # or 'VER'. These will be used for a convergence check.
+ # See related code in sub 'send_lines_to_vertical_aligner'.
+ if ( !$CODE_type
+ || $CODE_type eq 'VER' )
+ {
+ push @Klast_valign_code, $Klast;
+ }
}
}
}
$self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
+ # Setup the convergence test in the FileWriter based on line-ending indexes
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->setup_convergence_test( \@Klast_valign_code );
+
return;
}
my $line_of_tokens;
my $no_internal_newlines;
my $side_comment_follows;
+ my $CODE_type;
# range of K of tokens for the current line
my ( $K_first, $K_last );
$last_nonblank_token, $last_nonblank_type,
$last_nonblank_block_type, $K_last_nonblank_code,
$K_last_last_nonblank_code, $looking_for_else,
- $is_static_block_comment,
+ $is_static_block_comment, $batch_CODE_type,
+ $last_line_had_side_comment,
);
# Called once at the start of a new file
sub initialize_process_line_of_CODE {
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_nonblank_block_type = "";
- $K_last_nonblank_code = undef;
- $K_last_last_nonblank_code = undef;
- $looking_for_else = 0;
- $is_static_block_comment = 0;
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_last_nonblank_token = ';';
+ $last_last_nonblank_type = ';';
+ $last_nonblank_block_type = "";
+ $K_last_nonblank_code = undef;
+ $K_last_last_nonblank_code = undef;
+ $looking_for_else = 0;
+ $is_static_block_comment = 0;
+ $batch_CODE_type = "";
+ $last_line_had_side_comment = 0;
return;
}
}
++$max_index_to_go;
+ $batch_CODE_type = $CODE_type;
$K_to_go[$max_index_to_go] = $Ktoken_vars;
$types_to_go[$max_index_to_go] = $type;
$this_batch->[_ending_in_quote_] = $ending_in_quote;
$this_batch->[_max_index_to_go_] = $max_index_to_go;
$this_batch->[_rK_to_go_] = \@K_to_go;
+ $this_batch->[_batch_CODE_type_] = $batch_CODE_type;
# The flag $is_static_block_comment applies to the line which just
# arrived. So it only applies if we are outputting that line.
$self->[_this_batch_] = $this_batch;
+ $last_line_had_side_comment =
+ $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
+
$self->grind_batch_of_CODE();
# Done .. this batch is history
# lengths below the requested maximum line length.
$line_of_tokens = $my_line_of_tokens;
+ $CODE_type = $line_of_tokens->{_code_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $input_line = $line_of_tokens->{_line_text};
- my $CODE_type = $line_of_tokens->{_code_type};
# initialize closure variables
my $rK_range = $line_of_tokens->{_rK_range};
# only if allowed
&& $rOpts->{'blanks-before-comments'}
- # if this is NOT an empty comment line
- && $rtok_first->[_TOKEN_] ne '#'
+ # if this is NOT an empty comment
+ && ( $rtok_first->[_TOKEN_] ne '#'
+
+ # FIXME: FUTURE UPDATE; still needs to be coordinated with user parameters
+ # unless following a side comment (otherwise need to insert
+ # blank to prevent creating a hanging side comment)
+ ) #|| $last_line_had_side_comment )
# not after a short line ending in an opening token
# because we already have space above this comment.
$self->end_batch();
}
else {
- $self->flush(); # switching to new output stream
+
+ # switching to new output stream
+ $self->flush();
+
+ # Note that last arg in call here is 'undef' for comments
$file_writer_object->write_code_line(
- $rtok_first->[_TOKEN_] . "\n" );
+ $rtok_first->[_TOKEN_] . "\n", undef );
$self->[_last_line_leading_type_] = '#';
}
return;
$Kend = $Kend_next;
$type_end = $type_end_next;
+ # Only forward ending K values of non-comments down the pipeline.
+ # This is equivalent to checking that the last CODE_type is blank or
+ # equal to 'VER'. See also sub resync_lines_and_tokens for related
+ # coding. Note that '$batch_CODE_type' is the code type of the line
+ # to which the ending token belongs.
+ my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
+ my $Kend_code =
+ $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
+
# We use two slightly different definitions of level jump at the end
# of line:
# $ljump is the level jump needed by 'sub set_adjusted_indentation'
$rvalign_hash->{batch_count} = $batch_count;
$rvalign_hash->{break_alignment_before} = $break_alignment_before;
$rvalign_hash->{break_alignment_after} = $break_alignment_after;
+ $rvalign_hash->{Kend} = $Kend_code;
my $vao = $self->[_vertical_aligner_object_];
$vao->valign_input($rvalign_hash);
$file_writer_object->report_line_length_errors();
+ $self->[_converged_] = $file_writer_object->get_convergence_check();
+
return;
}
my $batch_count = $rline_hash->{batch_count};
my $break_alignment_before = $rline_hash->{break_alignment_before};
my $break_alignment_after = $rline_hash->{break_alignment_after};
+ my $Kend = $rline_hash->{Kend};
+
+ # The index '$Kend' is a value which passed along with the line text to sub
+ # 'write_code_line' for a convergence check.
# number of fields is $jmax
# number of tokens between fields is $jmax-1
# Note that for a comment group we are not storing a line
# but rather just the text and its length.
- $self->push_group_line( [ $rfields->[0], $rfield_lengths->[0] ] );
+ $self->push_group_line(
+ [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
else {
{
$self->[_group_type_] = 'COMMENT';
$self->[_comment_leading_space_count_] = $leading_space_count;
- $self->push_group_line( [ $rfields->[0], $rfield_lengths->[0] ] );
+ $self->push_group_line(
+ [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
side_comment_length => 0,
outdent_long_lines => $outdent_long_lines,
rvertical_tightness_flags => $rvertical_tightness_flags,
- level => $level
+ level => $level,
+ Kend => $Kend,
}
);
j_terminal_match => $j_terminal_match,
is_forced_break => $is_forced_break,
end_group => $break_alignment_after,
+ Kend => $Kend,
}
);
my $outdent_long_lines = 0;
foreach my $item ( @{$rgroup_lines} ) {
- my ( $line, $line_len ) = @{$item};
+ my ( $line, $line_len, $Kend ) = @{$item};
$self->valign_output_step_B(
{
leading_space_count => $leading_space_count,
outdent_long_lines => $outdent_long_lines,
rvertical_tightness_flags => "",
level => $group_level,
+ Kend => $Kend,
}
);
}
my $outdent_long_lines = $line->get_outdent_long_lines();
my $maximum_field_index = $line->get_jmax();
my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
+ my $Kend = $line->get_Kend();
# add any extra spaces
if ( $leading_space_count > $group_leader_length ) {
outdent_long_lines => $outdent_long_lines,
rvertical_tightness_flags => $rvertical_tightness_flags,
level => $level,
+ Kend => $Kend,
}
);
return;
my $cached_line_valid;
my $cached_line_leading_space_count;
my $cached_seqno_string;
+ my $cached_line_Kend;
my $seqno_string;
my $last_nonblank_seqno_string;
$cached_line_valid = 0;
$cached_line_leading_space_count = 0;
$cached_seqno_string = "";
+ $cached_line_Kend = undef;
# These vars hold a string of sequence numbers joined together used by
# the cache
$self->valign_output_step_C(
$cached_line_text,
$cached_line_leading_space_count,
- $self->[_last_level_written_]
+ $self->[_last_level_written_],
+ $cached_line_Kend,
);
$cached_line_type = 0;
$cached_line_text = "";
$cached_line_text_length = 0;
$cached_seqno_string = "";
+ $cached_line_Kend = undef,;
}
return;
}
my $outdent_long_lines = $rinput->{outdent_long_lines};
my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
my $level = $rinput->{level};
+ my $Kend = $rinput->{Kend};
my $last_level_written = $self->[_last_level_written_];
# Dump an invalid cached line
if ( !$cached_line_valid ) {
- $self->valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
}
# Handle cached line ending in OPENING tokens
$level = $last_level_written;
}
else {
- $self->valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
}
}
$level = $last_level_written;
}
else {
- $self->valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
}
}
}
$cached_line_type = 0;
$cached_line_text = "";
$cached_line_text_length = 0;
+ $cached_line_Kend = undef;
# make the line to be written
my $line = $leading_string . $str;
# write or cache this line
if ( !$open_or_close || $side_comment_length > 0 ) {
- $self->valign_output_step_C( $line, $leading_space_count, $level );
+ $self->valign_output_step_C( $line, $leading_space_count, $level,
+ $Kend );
}
else {
$cached_line_text = $line;
$cached_line_valid = $valid;
$cached_line_leading_space_count = $leading_space_count;
$cached_seqno_string = $seqno_string;
+ $cached_line_Kend = $Kend;
}
$self->[_last_level_written_] = $level;
if ( $valign_buffer_filling && $diff ) {
my $max_valign_buffer = @valign_buffer;
foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
- my ( $line, $leading_space_count, $level ) =
+ my ( $line, $leading_space_count, $level, $Kend ) =
@{ $valign_buffer[$i] };
my $ws = substr( $line, 0, $diff );
if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
$self->level_change( $leading_space_count, $diff,
$level );
}
- $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
+ $valign_buffer[$i] =
+ [ $line, $leading_space_count, $level, $Kend ];
}
}
return;
# Write one vertically aligned line of code to the output object.
###############################################################
- my ( $self, $line, $leading_space_count, $level ) = @_;
+ my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
# The line is currently correct if there is no tabbing (recommended!)
# We may have to lop off some leading spaces and replace with tabs.
}
}
my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->write_code_line( $line . "\n" );
+ $file_writer_object->write_code_line( $line . "\n", $Kend );
+
return;
}
_j_terminal_match_ => $i++,
_is_forced_break_ => $i++,
_end_group_ => $i++,
+ _Kend_ => $i++,
};
}
$self->[_j_terminal_match_] = $ri->{j_terminal_match};
$self->[_is_forced_break_] = $ri->{is_forced_break};
$self->[_end_group_] = $ri->{end_group};
+ $self->[_Kend_] = $ri->{Kend};
$self->[_ralignments_] = [];
sub get_rfield_lengths { return $_[0]->[_rfield_lengths_] }
sub get_rpatterns { return $_[0]->[_rpatterns_] }
sub get_indentation { return $_[0]->[_indentation_] }
+ sub get_Kend { return $_[0]->[_Kend_] }
sub get_j_terminal_match {
return $_[0]->[_j_terminal_match_];
}
1;
+
=over 4
+=item B<improved convergence test>
+
+A better test for convergence has been added. When iterations are requested,
+the new test will stop after the first pass if no changes in line break
+locations are made. Previously, at least two passes were required to verify
+convergnece unless the output stream had the same checksum as the input stream.
+Extensive testing has been made to verify the correctness of the new test.
+This update was made 23 Nov 2020.
+
=item B<fixed problem with vertical alignments involving 'if' statements>
An update was made to break vertical alignment when a new sequence of if-like