#
#####################################################################
+# Index...
+# CODE SECTION 1: Preliminary code and global definitions up to sub new
+# CODE SECTION 2: Some Basic Utilities
+# CODE SECTION 3: Check and process options
+# CODE SECTION 4: Receive lines from the tokenizer
+# CODE SECTION 5: Pre-process the entire file
+# CODE SECTION 6: Process line-by-line
+# CODE SECTION 7: Process lines of code
+# CODE SECTION 8: Utilities for setting breakpoints
+# CODE SECTION 9: Process batches of code
+# CODE SECTION 10: Code to break long statments
+# CODE SECTION 11: Code to break long lists
+# CODE SECTION 12: Code for setting indentation
+# CODE SECTION 13: Preparing batches for vertical alignment
+# CODE SECTION 14: Code for creating closing side comments
+# CODE SECTION 15: Summarize
+
+#######################################################################
+# CODE SECTION 1: Preliminary code and global definitions up to sub new
+#######################################################################
+
package Perl::Tidy::Formatter;
use strict;
use warnings;
-{ #<<< begin package Perl::Tidy::Formatter
+{ #<<< A non-indenting brace to contain all lexical variables
use Carp;
our $VERSION = '20200907.01';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
+sub AUTOLOAD {
+
+ # Catch any undefined sub calls so that we are sure to get
+ # some diagnostic information. This sub should never be called
+ # except for a programming error.
+ our $AUTOLOAD;
+ return if ( $AUTOLOAD eq 'DESTROY' );
+ my ( $pkg, $fname, $lno ) = caller();
+ print STDERR <<EOM;
+======================================================================
+Unexpected call to Autoload looking for sub $AUTOLOAD
+Called from package: '$pkg'
+Called from File '$fname' at line '$lno'
+This error is probably due to a recent programming change
+======================================================================
+EOM
+ exit 1;
+}
+
+sub DESTROY {
+ my $self = shift;
+ $self->_decrement_count();
+ return;
+}
+
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
return;
}
+sub Fault {
+ my ($msg) = @_;
+
+ # This routine is called for errors that really should not occur
+ # except if there has been a bug introduced by a recent program change
+ my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+ my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+ my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $input_stream_name = get_input_stream_name();
+
+ Die(<<EOM);
+==============================================================================
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+==============================================================================
+EOM
+
+ # We shouldn't get here, but this return is to keep Perl-Critic from
+ # complaining.
+ return;
+}
+
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
sub _decrement_count { return --$_count }
} ## end closure to count instanes
-sub trim {
+sub new {
- # trim leading and trailing whitespace from a string
- my $str = shift;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return $str;
-}
+ my ( $class, @args ) = @_;
-sub max {
- my (@vals) = @_;
- my $max = shift @vals;
- for (@vals) { $max = $_ > $max ? $_ : $max }
- return $max;
+ # we are given an object with a write_line() method to take lines
+ my %defaults = (
+ sink_object => undef,
+ diagnostics_object => undef,
+ logger_object => undef,
+ length_function => sub { return length( $_[0] ) },
+ fh_tee => undef,
+ );
+ my %args = ( %defaults, @args );
+
+ my $length_function = $args{length_function};
+ my $fh_tee = $args{fh_tee};
+ my $logger_object = $args{logger_object};
+ my $diagnostics_object = $args{diagnostics_object};
+
+ # we create another object with a get_line() and peek_ahead() method
+ my $sink_object = $args{sink_object};
+ my $file_writer_object =
+ Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
+
+ # initialize closure variables...
+ set_logger_object($logger_object);
+ set_diagnostics_object($diagnostics_object);
+ initialize_gnu_vars();
+ initialize_csc_vars();
+ initialize_scan_list();
+ initialize_saved_opening_indentation();
+ initialize_process_line_of_CODE();
+ initialize_grind_batch_of_CODE();
+ initialize_adjusted_indentation();
+ initialize_postponed_breakpoint();
+ prepare_for_next_batch();
+
+ my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
+ rOpts => $rOpts,
+ file_writer_object => $file_writer_object,
+ logger_object => $logger_object,
+ diagnostics_object => $diagnostics_object,
+ length_function => $length_function
+ );
+
+ if ( $rOpts->{'entab-leading-whitespace'} ) {
+ write_logfile_entry(
+"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
+ );
+ }
+ elsif ( $rOpts->{'tabs'} ) {
+ write_logfile_entry("Indentation will be with a tab character\n");
+ }
+ else {
+ write_logfile_entry(
+ "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
+ }
+
+ # Initialize the $self array reference.
+ # To add an item, first add a constant index in the BEGIN block above.
+ my $self = [];
+
+ # Basic data structures...
+ $self->[_rlines_] = []; # = ref to array of lines of the file
+ $self->[_rlines_new_] = []; # = ref to array of output lines
+ # (FOR FUTURE DEVELOPMENT)
+ $self->[_rLL_] = []; # = ref to array with all tokens
+ # in the file. LL originally meant
+ # 'Linked List'. Linked lists were a
+ # bad idea but LL is easy to type.
+ $self->[_Klimit_] = undef; # = maximum K index for rLL. This is
+ # needed to catch any autovivification
+ # problems.
+ $self->[_K_opening_container_] = {}; # for quickly traversing structure
+ $self->[_K_closing_container_] = {}; # for quickly traversing structure
+ $self->[_K_opening_ternary_] = {}; # for quickly traversing structure
+ $self->[_K_closing_ternary_] = {}; # for quickly traversing structure
+ $self->[_rK_phantom_semicolons_] =
+ undef; # for undoing phantom semicolons if iterating
+ $self->[_rtype_count_by_seqno_] = {};
+ $self->[_ris_broken_container_] = {};
+ $self->[_rhas_broken_container_] = {};
+ $self->[_rparent_of_seqno_] = {};
+ $self->[_rchildren_of_seqno_] = {};
+ $self->[_rpaired_to_inner_container_] = {};
+ $self->[_rbreak_container_] = {}; # prevent one-line blocks
+ $self->[_rshort_nested_] = {}; # blocks not forced open
+ $self->[_length_function_] = $length_function;
+
+ # Some objects...
+ $self->[_fh_tee_] = $fh_tee;
+ $self->[_sink_object_] = $sink_object;
+ $self->[_file_writer_object_] = $file_writer_object;
+ $self->[_vertical_aligner_object_] = $vertical_aligner_object;
+
+ # Reference to the batch being processed
+ $self->[_this_batch_] = [];
+
+ # Memory of processed text...
+ $self->[_last_last_line_leading_level_] = 0;
+ $self->[_last_line_leading_level_] = 0;
+ $self->[_last_line_leading_type_] = '#';
+ $self->[_last_output_short_opening_token_] = 0;
+ $self->[_added_semicolon_count_] = 0;
+ $self->[_first_added_semicolon_at_] = 0;
+ $self->[_last_added_semicolon_at_] = 0;
+ $self->[_deleted_semicolon_count_] = 0;
+ $self->[_first_deleted_semicolon_at_] = 0;
+ $self->[_last_deleted_semicolon_at_] = 0;
+ $self->[_embedded_tab_count_] = 0;
+ $self->[_first_embedded_tab_at_] = 0;
+ $self->[_last_embedded_tab_at_] = 0;
+ $self->[_first_tabbing_disagreement_] = 0;
+ $self->[_last_tabbing_disagreement_] = 0;
+ $self->[_tabbing_disagreement_count_] = 0;
+ $self->[_in_tabbing_disagreement_] = 0;
+ $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
+ $self->[_saw_END_or_DATA_] = 0;
+
+ # Hashes related to container welding...
+ $self->[_rnested_pairs_] = [];
+ $self->[_radjusted_levels_] = [];
+ $self->[_rweld_len_left_closing_] = {};
+ $self->[_rweld_len_right_closing_] = {};
+ $self->[_rweld_len_left_opening_] = {};
+ $self->[_rweld_len_right_opening_] = {};
+
+ $self->[_rspecial_side_comment_type_] = {};
+
+ bless $self, $class;
+
+ # Safety check..this is not a class yet
+ if ( _increment_count() > 1 ) {
+ confess
+"Attempt to create more than 1 object in $class, which is not a true class yet\n";
+ }
+ return $self;
}
-sub min {
- my (@vals) = @_;
- my $min = shift @vals;
- for (@vals) { $min = $_ < $min ? $_ : $min }
- return $min;
+######################################
+# CODE SECTION 2: Some Basic Utilities
+######################################
+
+sub set_rLL_max_index {
+ my $self = shift;
+
+ # Set the limit of the rLL array, assuming that it is correct.
+ # This should only be called by routines after they make changes
+ # to tokenization
+ my $rLL = $self->[_rLL_];
+ if ( !defined($rLL) ) {
+
+ # Shouldn't happen because rLL was initialized to be an array ref
+ Fault("Undefined Memory rLL");
+ }
+ my $Klimit_old = $self->[_Klimit_];
+ my $num = @{$rLL};
+ my $Klimit;
+ if ( $num > 0 ) { $Klimit = $num - 1 }
+ $self->[_Klimit_] = $Klimit;
+ return ($Klimit);
}
-sub split_words {
+sub get_rLL_max_index {
+ my $self = shift;
- # given a string containing words separated by whitespace,
- # return the list of words
- my ($str) = @_;
- return unless $str;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return split( /\s+/, $str );
+ # the memory location $rLL and number of tokens should be obtained
+ # from this routine so that any autovivication can be immediately caught.
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ if ( !defined($rLL) ) {
+
+ # Shouldn't happen because rLL was initialized to be an array ref
+ Fault("Undefined Memory rLL");
+ }
+ my $num = @{$rLL};
+ if ( $num == 0 && defined($Klimit)
+ || $num > 0 && !defined($Klimit)
+ || $num > 0 && $Klimit != $num - 1 )
+ {
+
+ # Possible autovivification problem...
+ if ( !defined($Klimit) ) { $Klimit = '*' }
+ Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
+ }
+ return ($Klimit);
}
sub check_keys {
return $vao->get_output_line_number();
}
-sub AUTOLOAD {
-
- # Catch any undefined sub calls so that we are sure to get
- # some diagnostic information. This sub should never be called
- # except for a programming error.
- our $AUTOLOAD;
- return if ( $AUTOLOAD eq 'DESTROY' );
- my ( $pkg, $fname, $lno ) = caller();
- print STDERR <<EOM;
-======================================================================
-Unexpected call to Autoload looking for sub $AUTOLOAD
-Called from package: '$pkg'
-Called from File '$fname' at line '$lno'
-This error is probably due to a recent programming change
-======================================================================
-EOM
- exit 1;
-}
-
-sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
-}
-
-sub new {
-
- my ( $class, @args ) = @_;
-
- # we are given an object with a write_line() method to take lines
- my %defaults = (
- sink_object => undef,
- diagnostics_object => undef,
- logger_object => undef,
- length_function => sub { return length( $_[0] ) },
- fh_tee => undef,
- );
- my %args = ( %defaults, @args );
-
- my $length_function = $args{length_function};
- my $fh_tee = $args{fh_tee};
- my $logger_object = $args{logger_object};
- my $diagnostics_object = $args{diagnostics_object};
-
- # we create another object with a get_line() and peek_ahead() method
- my $sink_object = $args{sink_object};
- my $file_writer_object =
- Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
-
- # initialize closure variables...
- set_logger_object($logger_object);
- set_diagnostics_object($diagnostics_object);
- initialize_gnu_vars();
- initialize_csc_vars();
- initialize_scan_list();
- initialize_saved_opening_indentation();
- initialize_process_line_of_CODE();
- initialize_grind_batch_of_CODE();
- initialize_adjusted_indentation();
- initialize_postponed_breakpoint();
- prepare_for_next_batch();
-
- my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
- rOpts => $rOpts,
- file_writer_object => $file_writer_object,
- logger_object => $logger_object,
- diagnostics_object => $diagnostics_object,
- length_function => $length_function
- );
-
- if ( $rOpts->{'entab-leading-whitespace'} ) {
- write_logfile_entry(
-"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
- );
- }
- elsif ( $rOpts->{'tabs'} ) {
- write_logfile_entry("Indentation will be with a tab character\n");
- }
- else {
- write_logfile_entry(
- "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
- }
-
- # Initialize the $self array reference.
- # To add an item, first add a constant index in the BEGIN block above.
- my $self = [];
-
- # Basic data structures...
- $self->[_rlines_] = []; # = ref to array of lines of the file
- $self->[_rlines_new_] = []; # = ref to array of output lines
- # (FOR FUTURE DEVELOPMENT)
- $self->[_rLL_] = []; # = ref to array with all tokens
- # in the file. LL originally meant
- # 'Linked List'. Linked lists were a
- # bad idea but LL is easy to type.
- $self->[_Klimit_] = undef; # = maximum K index for rLL. This is
- # needed to catch any autovivification
- # problems.
- $self->[_K_opening_container_] = {}; # for quickly traversing structure
- $self->[_K_closing_container_] = {}; # for quickly traversing structure
- $self->[_K_opening_ternary_] = {}; # for quickly traversing structure
- $self->[_K_closing_ternary_] = {}; # for quickly traversing structure
- $self->[_rK_phantom_semicolons_] =
- undef; # for undoing phantom semicolons if iterating
- $self->[_rtype_count_by_seqno_] = {};
- $self->[_ris_broken_container_] = {};
- $self->[_rhas_broken_container_] = {};
- $self->[_rparent_of_seqno_] = {};
- $self->[_rchildren_of_seqno_] = {};
- $self->[_rpaired_to_inner_container_] = {};
- $self->[_rbreak_container_] = {}; # prevent one-line blocks
- $self->[_rshort_nested_] = {}; # blocks not forced open
- $self->[_length_function_] = $length_function;
-
- # Some objects...
- $self->[_fh_tee_] = $fh_tee;
- $self->[_sink_object_] = $sink_object;
- $self->[_file_writer_object_] = $file_writer_object;
- $self->[_vertical_aligner_object_] = $vertical_aligner_object;
-
- # Reference to the batch being processed
- $self->[_this_batch_] = [];
-
- # Memory of processed text...
- $self->[_last_last_line_leading_level_] = 0;
- $self->[_last_line_leading_level_] = 0;
- $self->[_last_line_leading_type_] = '#';
- $self->[_last_output_short_opening_token_] = 0;
- $self->[_added_semicolon_count_] = 0;
- $self->[_first_added_semicolon_at_] = 0;
- $self->[_last_added_semicolon_at_] = 0;
- $self->[_deleted_semicolon_count_] = 0;
- $self->[_first_deleted_semicolon_at_] = 0;
- $self->[_last_deleted_semicolon_at_] = 0;
- $self->[_embedded_tab_count_] = 0;
- $self->[_first_embedded_tab_at_] = 0;
- $self->[_last_embedded_tab_at_] = 0;
- $self->[_first_tabbing_disagreement_] = 0;
- $self->[_last_tabbing_disagreement_] = 0;
- $self->[_tabbing_disagreement_count_] = 0;
- $self->[_in_tabbing_disagreement_] = 0;
- $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
- $self->[_saw_END_or_DATA_] = 0;
-
- # Hashes related to container welding...
- $self->[_rnested_pairs_] = [];
- $self->[_radjusted_levels_] = [];
- $self->[_rweld_len_left_closing_] = {};
- $self->[_rweld_len_right_closing_] = {};
- $self->[_rweld_len_left_opening_] = {};
- $self->[_rweld_len_right_opening_] = {};
-
- $self->[_rspecial_side_comment_type_] = {};
-
- bless $self, $class;
-
- # Safety check..this is not a class yet
- if ( _increment_count() > 1 ) {
- confess
-"Attempt to create more than 1 object in $class, which is not a true class yet\n";
- }
- return $self;
-}
-
-sub Fault {
- my ($msg) = @_;
-
- # This routine is called for errors that really should not occur
- # except if there has been a bug introduced by a recent program change
- my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
- my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
- my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
- my $input_stream_name = get_input_stream_name();
-
- Die(<<EOM);
-==============================================================================
-While operating on input stream with name: '$input_stream_name'
-A fault was detected at line $line0 of sub '$subroutine1'
-in file '$filename1'
-which was called from line $line1 of sub '$subroutine2'
-Message: '$msg'
-This is probably an error introduced by a recent programming change.
-==============================================================================
-EOM
-
- # We shouldn't get here, but this return is to keep Perl-Critic from
- # complaining.
- return;
-}
-
-sub check_token_array {
- my $self = shift;
+sub check_token_array {
+ my $self = shift;
# Check for errors in the array of tokens
my $rLL = $self->[_rLL_];
return;
}
-sub set_rLL_max_index {
+sub want_blank_line {
my $self = shift;
-
- # Set the limit of the rLL array, assuming that it is correct.
- # This should only be called by routines after they make changes
- # to tokenization
- my $rLL = $self->[_rLL_];
- if ( !defined($rLL) ) {
-
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
- }
- my $Klimit_old = $self->[_Klimit_];
- my $num = @{$rLL};
- my $Klimit;
- if ( $num > 0 ) { $Klimit = $num - 1 }
- $self->[_Klimit_] = $Klimit;
- return ($Klimit);
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->want_blank_line();
+ return;
}
-sub get_rLL_max_index {
- my $self = shift;
-
- # the memory location $rLL and number of tokens should be obtained
- # from this routine so that any autovivication can be immediately caught.
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- if ( !defined($rLL) ) {
-
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
- }
- my $num = @{$rLL};
- if ( $num == 0 && defined($Klimit)
- || $num > 0 && !defined($Klimit)
- || $num > 0 && $Klimit != $num - 1 )
- {
-
- # Possible autovivification problem...
- if ( !defined($Klimit) ) { $Klimit = '*' }
- Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
- }
- return ($Klimit);
+sub write_unindented_line {
+ my ( $self, $line ) = @_;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_line($line);
+ return;
}
-sub prepare_for_next_batch {
- initialize_forced_breakpoint_vars();
- initialize_gnu_batch_vars();
- initialize_batch_variables();
- return;
+sub consecutive_nonblank_lines {
+ my ($self) = @_;
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $vao = $self->[_vertical_aligner_object_];
+ return $file_writer_object->get_consecutive_nonblank_lines() +
+ $vao->get_cached_line_count();
}
-sub keyword_group_scan {
- my $self = shift;
+sub trim {
- # Called once per file to process the --keyword-group-blanks-* parameters.
+ # trim leading and trailing whitespace from a string
+ my $str = shift;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return $str;
+}
- # Manipulate blank lines around keyword groups (kgb* flags)
- # Scan all lines looking for runs of consecutive lines beginning with
- # selected keywords. Example keywords are 'my', 'our', 'local', ... but
- # they may be anything. We will set flags requesting that blanks be
- # inserted around and within them according to input parameters. Note
- # that we are scanning the lines as they came in in the input stream, so
- # they are not necessarily well formatted.
+sub max {
+ my (@vals) = @_;
+ my $max = shift @vals;
+ for (@vals) { $max = $_ > $max ? $_ : $max }
+ return $max;
+}
- # The output of this sub is a return hash ref whose keys are the indexes of
- # lines after which we desire a blank line. For line index i:
- # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
- # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
- my $rhash_of_desires = {};
+sub min {
+ my (@vals) = @_;
+ my $min = shift @vals;
+ for (@vals) { $min = $_ < $min ? $_ : $min }
+ return $min;
+}
- my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
- my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
- my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
- my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
- my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
+sub split_words {
- # A range of sizes can be input with decimal notation like 'min.max' with
- # any number of dots between the two numbers. Examples:
- # string => min max matches
- # 1.1 1 1 exactly 1
- # 1.3 1 3 1,2, or 3
- # 1..3 1 3 1,2, or 3
- # 5 5 - 5 or more
- # 6. 6 - 6 or more
- # .2 - 2 up to 2
- # 1.0 1 0 nothing
- my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
- if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
- || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
- {
- Warn(<<EOM);
-Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
-ignoring all -kgb flags
-EOM
+ # given a string containing words separated by whitespace,
+ # return the list of words
+ my ($str) = @_;
+ return unless $str;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return split( /\s+/, $str );
+}
- # Turn this option off so that this message does not keep repeating
- # during iterations and other files.
- $rOpts->{'keyword-group-blanks-size'} = "";
- return $rhash_of_desires;
- }
- $Opt_size_min = 1 unless ($Opt_size_min);
+###########################################
+# CODE SECTION 3: Check and process options
+###########################################
- if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
- return $rhash_of_desires;
- }
+sub check_options {
- # codes for $Opt_blanks_before and $Opt_blanks_after:
- # 0 = never (delete if exist)
- # 1 = stable (keep unchanged)
- # 2 = always (insert if missing)
+ # This routine is called to check the user-supplied run parameters
+ # and to configure the control hashes to them.
+ $rOpts = shift;
- return $rhash_of_desires
- unless $Opt_size_min > 0
- && ( $Opt_blanks_before != 1
- || $Opt_blanks_after != 1
- || $Opt_blanks_inside
- || $Opt_blanks_delete );
+ initialize_whitespace_hashes();
+ initialize_bond_strength_hashes();
- my $Opt_pattern = $keyword_group_list_pattern;
- my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
- my $Opt_repeat_count =
- $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
+ # Make needed regex patterns for matching text.
+ # NOTE: sub_matching_patterns must be made first because later patterns use
+ # them; see RT #133130.
+ make_sub_matching_pattern();
+ make_static_block_comment_pattern();
+ make_static_side_comment_pattern();
+ make_closing_side_comment_prefix();
+ make_closing_side_comment_list_pattern();
+ $format_skipping_pattern_begin =
+ make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
+ $format_skipping_pattern_end =
+ make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
+ make_non_indenting_brace_pattern();
- my $rlines = $self->[_rlines_];
- my $rLL = $self->[_rLL_];
- my $K_closing_container = $self->[_K_closing_container_];
+ # If closing side comments ARE selected, then we can safely
+ # delete old closing side comments unless closing side comment
+ # warnings are requested. This is a good idea because it will
+ # eliminate any old csc's which fall below the line count threshold.
+ # We cannot do this if warnings are turned on, though, because we
+ # might delete some text which has been added. So that must
+ # be handled when comments are created.
+ if ( $rOpts->{'closing-side-comments'} ) {
+ if ( !$rOpts->{'closing-side-comment-warnings'} ) {
+ $rOpts->{'delete-closing-side-comments'} = 1;
+ }
+ }
- # variables for the current group and subgroups:
- my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
- @subgroup );
+ # If closing side comments ARE NOT selected, but warnings ARE
+ # selected and we ARE DELETING csc's, then we will pretend to be
+ # adding with a huge interval. This will force the comments to be
+ # generated for comparison with the old comments, but not added.
+ elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
+ if ( $rOpts->{'delete-closing-side-comments'} ) {
+ $rOpts->{'delete-closing-side-comments'} = 0;
+ $rOpts->{'closing-side-comments'} = 1;
+ $rOpts->{'closing-side-comment-interval'} = 100000000;
+ }
+ }
- # Definitions:
- # ($ibeg, $iend) = starting and ending line indexes of this entire group
- # $count = total number of keywords seen in this entire group
- # $level_beg = indententation level of this group
- # @group = [ $i, $token, $count ] =list of all keywords & blanks
- # @subgroup = $j, index of group where token changes
- # @iblanks = line indexes of blank lines in input stream in this group
- # where i=starting line index
- # token (the keyword)
- # count = number of this token in this subgroup
- # j = index in group where token changes
- #
- # These vars will contain values for the most recently seen line:
- my ( $line_type, $CODE_type, $K_first, $K_last );
+ make_bli_pattern();
+ make_block_brace_vertical_tightness_pattern();
+ make_blank_line_pattern();
+ make_keyword_group_list_pattern();
- my $number_of_groups_seen = 0;
+ # Make initial list of desired one line block types
+ # They will be modified by 'prepare_cuddled_block_types'
+ %want_one_line_block = %is_sort_map_grep_eval;
- ####################
- # helper subroutines
- ####################
+ prepare_cuddled_block_types();
+ if ( $rOpts->{'dump-cuddled-block-list'} ) {
+ dump_cuddled_block_list(*STDOUT);
+ Exit(0);
+ }
- my $insert_blank_after = sub {
- my ($i) = @_;
- $rhash_of_desires->{$i} = 1;
- my $ip = $i + 1;
- if ( defined( $rhash_of_desires->{$ip} )
- && $rhash_of_desires->{$ip} == 2 )
+ if ( $rOpts->{'line-up-parentheses'} ) {
+
+ if ( $rOpts->{'indent-only'}
+ || !$rOpts->{'add-newlines'}
+ || !$rOpts->{'delete-old-newlines'} )
{
- $rhash_of_desires->{$ip} = 0;
+ Warn(<<EOM);
+-----------------------------------------------------------------------
+Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
+
+The -lp indentation logic requires that perltidy be able to coordinate
+arbitrarily large numbers of line breakpoints. This isn't possible
+with these flags.
+-----------------------------------------------------------------------
+EOM
+ $rOpts->{'line-up-parentheses'} = 0;
}
- return;
- };
- my $split_into_sub_groups = sub {
+ if ( $rOpts->{'whitespace-cycle'} ) {
+ Warn(<<EOM);
+Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
+EOM
+ $rOpts->{'whitespace-cycle'} = 0;
+ }
+ }
- # place blanks around long sub-groups of keywords
- # ...if requested
- return unless ($Opt_blanks_inside);
+ # At present, tabs are not compatible with the line-up-parentheses style
+ # (it would be possible to entab the total leading whitespace
+ # just prior to writing the line, if desired).
+ if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # loop over sub-groups, index k
- push @subgroup, scalar @group;
- my $kbeg = 1;
- my $kend = @subgroup - 1;
- for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
+ # Likewise, tabs are not compatible with outdenting..
+ if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # index j runs through all keywords found
- my $j_b = $subgroup[ $k - 1 ];
- my $j_e = $subgroup[$k] - 1;
+ if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # index i is the actual line number of a keyword
- my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
- my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
- my $num = $count_e - $count_b + 1;
+ if ( !$rOpts->{'space-for-semicolon'} ) {
+ $want_left_space{'f'} = -1;
+ }
- # This subgroup runs from line $ib to line $ie-1, but may contain
- # blank lines
- if ( $num >= $Opt_size_min ) {
+ if ( $rOpts->{'space-terminal-semicolon'} ) {
+ $want_left_space{';'} = 1;
+ }
- # if there are blank lines, we require that at least $num lines
- # be non-blank up to the boundary with the next subgroup.
- my $nog_b = my $nog_e = 1;
- if ( @iblanks && !$Opt_blanks_delete ) {
- my $j_bb = $j_b + $num - 1;
- my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
- $nog_b = $count_bb - $count_b + 1 == $num;
+ # implement outdenting preferences for keywords
+ %outdent_keyword = ();
+ my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
+ unless (@okw) {
+ @okw = qw(next last redo goto return); # defaults
+ }
- my $j_ee = $j_e - ( $num - 1 );
- my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
- $nog_e = $count_e - $count_ee + 1 == $num;
- }
- if ( $nog_b && $k > $kbeg ) {
- $insert_blank_after->( $i_b - 1 );
- }
- if ( $nog_e && $k < $kend ) {
- my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
- $insert_blank_after->( $i_ep - 1 );
- }
- }
+ # FUTURE: if not a keyword, assume that it is an identifier
+ foreach (@okw) {
+ if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
+ $outdent_keyword{$_} = 1;
}
- };
-
- my $delete_if_blank = sub {
- my ($i) = @_;
-
- # delete line $i if it is blank
- return unless ( $i >= 0 && $i < @{$rlines} );
- my $line_type = $rlines->[$i]->{_line_type};
- return if ( $line_type ne 'CODE' );
- my $code_type = $rlines->[$i]->{_code_type};
- if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
- return;
- };
-
- my $delete_inner_blank_lines = sub {
-
- # always remove unwanted trailing blank lines from our list
- return unless (@iblanks);
- while ( my $ibl = pop(@iblanks) ) {
- if ( $ibl < $iend ) { push @iblanks, $ibl; last }
- $iend = $ibl;
+ else {
+ Warn("ignoring '$_' in -okwl list; not a perl keyword");
}
+ }
- # now mark mark interior blank lines for deletion if requested
- return unless ($Opt_blanks_delete);
-
- while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
-
- };
+ # setup hash for -kpit option
+ %keyword_paren_inner_tightness = ();
+ my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
+ if ( defined($kpit_value) && $kpit_value != 1 ) {
+ my @kpit =
+ split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
+ unless (@kpit) {
+ @kpit = qw(if elsif unless while until for foreach); # defaults
+ }
- my $end_group = sub {
+ # we will allow keywords and user-defined identifiers
+ foreach (@kpit) {
+ $keyword_paren_inner_tightness{$_} = $kpit_value;
+ }
+ }
- # end a group of keywords
- my ($bad_ending) = @_;
- if ( defined($ibeg) && $ibeg >= 0 ) {
+ # implement user whitespace preferences
+ if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
+ @want_left_space{@q} = (1) x scalar(@q);
+ }
- # then handle sufficiently large groups
- if ( $count >= $Opt_size_min ) {
+ if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
+ @want_right_space{@q} = (1) x scalar(@q);
+ }
- $number_of_groups_seen++;
+ if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
+ @want_left_space{@q} = (-1) x scalar(@q);
+ }
- # do any blank deletions regardless of the count
- $delete_inner_blank_lines->();
+ if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
+ @want_right_space{@q} = (-1) x scalar(@q);
+ }
+ if ( $rOpts->{'dump-want-left-space'} ) {
+ dump_want_left_space(*STDOUT);
+ Exit(0);
+ }
- if ( $ibeg > 0 ) {
- my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
+ if ( $rOpts->{'dump-want-right-space'} ) {
+ dump_want_right_space(*STDOUT);
+ Exit(0);
+ }
- # patch for hash bang line which is not currently marked as
- # a comment; mark it as a comment
- if ( $ibeg == 1 && !$code_type ) {
- my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
- $code_type = 'BC'
- if ( $line_text && $line_text =~ /^#/ );
- }
+ # default keywords for which space is introduced before an opening paren
+ # (at present, including them messes up vertical alignment)
+ my @sak = qw(my local our and or xor err eq ne if else elsif until
+ unless while for foreach return switch case given when catch);
+ %space_after_keyword = map { $_ => 1 } @sak;
- # Do not insert a blank after a comment
- # (this could be subject to a flag in the future)
- if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
- if ( $Opt_blanks_before == INSERT ) {
- $insert_blank_after->( $ibeg - 1 );
+ # first remove any or all of these if desired
+ if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
- }
- elsif ( $Opt_blanks_before == DELETE ) {
- $delete_if_blank->( $ibeg - 1 );
- }
- }
- }
+ # -nsak='*' selects all the above keywords
+ if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
+ @space_after_keyword{@q} = (0) x scalar(@q);
+ }
- # We will only put blanks before code lines. We could loosen
- # this rule a little, but we have to be very careful because
- # for example we certainly don't want to drop a blank line
- # after a line like this:
- # my $var = <<EOM;
- if ( $line_type eq 'CODE' && defined($K_first) ) {
+ # then allow user to add to these defaults
+ if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
+ @space_after_keyword{@q} = (1) x scalar(@q);
+ }
- # - Do not put a blank before a line of different level
- # - Do not put a blank line if we ended the search badly
- # - Do not put a blank at the end of the file
- # - Do not put a blank line before a hanging side comment
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # implement user break preferences
+ my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
- if ( $level == $level_beg
- && $ci_level == 0
- && !$bad_ending
- && $iend < @{$rlines}
- && $CODE_type ne 'HSC' )
- {
- if ( $Opt_blanks_after == INSERT ) {
- $insert_blank_after->($iend);
- }
- elsif ( $Opt_blanks_after == DELETE ) {
- $delete_if_blank->( $iend + 1 );
- }
- }
- }
+ my $break_after = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
}
- $split_into_sub_groups->();
}
-
- # reset for another group
- $ibeg = -1;
- $iend = undef;
- $level_beg = -1;
- $K_closing = undef;
- @group = ();
- @subgroup = ();
- @iblanks = ();
};
- my $find_container_end = sub {
-
- # If the keyword lines ends with an open token, find the closing token
- # '$K_closing' so that we can easily skip past the contents of the
- # container.
- return if ( $K_last <= $K_first );
- my $KK = $K_last;
- my $type_last = $rLL->[$KK]->[_TYPE_];
- my $tok_last = $rLL->[$KK]->[_TOKEN_];
- if ( $type_last eq '#' ) {
- $KK = $self->K_previous_nonblank($KK);
- $tok_last = $rLL->[$KK]->[_TOKEN_];
- }
- if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
-
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $lev = $rLL->[$KK]->[_LEVEL_];
- if ( $lev == $level_beg ) {
- $K_closing = $K_closing_container->{$type_sequence};
+ my $break_before = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
}
}
};
- my $add_to_group = sub {
- my ( $i, $token, $level ) = @_;
-
- # End the previous group if we have reached the maximum
- # group size
- if ( $Opt_size_max && @group >= $Opt_size_max ) {
- $end_group->();
- }
-
- if ( @group == 0 ) {
- $ibeg = $i;
- $level_beg = $level;
- $count = 0;
- }
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
- $count++;
- $iend = $i;
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
- # New sub-group?
- if ( !@group || $token ne $group[-1]->[1] ) {
- push @subgroup, scalar(@group);
- }
- push @group, [ $i, $token, $count ];
+ # make note if breaks are before certain key types
+ %want_break_before = ();
+ foreach my $tok ( @all_operators, ',' ) {
+ $want_break_before{$tok} =
+ $left_bond_strength{$tok} < $right_bond_strength{$tok};
+ }
- # remember if this line ends in an open container
- $find_container_end->();
+ # Coordinate ?/: breaks, which must be similar
+ if ( !$want_break_before{':'} ) {
+ $want_break_before{'?'} = $want_break_before{':'};
+ $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
+ $left_bond_strength{'?'} = NO_BREAK;
+ }
- return;
- };
+ # Only make a hash entry for the next parameters if values are defined.
+ # That allows a quick check to be made later.
+ %break_before_container_types = ();
+ for ( $rOpts->{'break-before-hash-brace'} ) {
+ $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-square-bracket'} ) {
+ $break_before_container_types{'['} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-paren'} ) {
+ $break_before_container_types{'('} = $_ if $_ && $_ > 0;
+ }
- ###################################
- # loop over all lines of the source
- ###################################
- $end_group->();
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
-
- $i++;
- last
- if ( $Opt_repeat_count > 0
- && $number_of_groups_seen >= $Opt_repeat_count );
+ %container_indentation_options = ();
+ for ( $rOpts->{'break-before-hash-brace-and-indent'} ) {
+ my $tok = '{';
+ if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
+ $container_indentation_options{$tok} = $_;
+ }
+ }
+ for ( $rOpts->{'break-before-square-bracket-and-indent'} ) {
+ my $tok = '[';
+ if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
+ $container_indentation_options{$tok} = $_;
+ }
+ }
+ for ( $rOpts->{'break-before-paren-and-indent'} ) {
+ my $tok = '(';
+ if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
+ $container_indentation_options{$tok} = $_;
+ }
+ }
- $CODE_type = "";
- $K_first = undef;
- $K_last = undef;
- $line_type = $line_of_tokens->{_line_type};
+ # Define here tokens which may follow the closing brace of a do statement
+ # on the same line, as in:
+ # } while ( $something);
+ my @dof = qw(until while unless if ; : );
+ push @dof, ',';
+ @is_do_follower{@dof} = (1) x scalar(@dof);
- # always end a group at non-CODE
- if ( $line_type ne 'CODE' ) { $end_group->(); next }
+ # What tokens may follow the closing brace of an if or elsif block?
+ # Not used. Previously used for cuddled else, but no longer needed.
+ %is_if_brace_follower = ();
- $CODE_type = $line_of_tokens->{_code_type};
+ # nothing can follow the closing curly of an else { } block:
+ %is_else_brace_follower = ();
- # end any group at a format skipping line
- if ( $CODE_type && $CODE_type eq 'FS' ) {
- $end_group->();
- next;
- }
+ # what can follow a multi-line anonymous sub definition closing curly:
+ my @asf = qw# ; : => or and && || ~~ !~~ ) #;
+ push @asf, ',';
+ @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
- # continue in a verbatim (VB) type; it may be quoted text
- if ( $CODE_type eq 'VB' ) {
- if ( $ibeg >= 0 ) { $iend = $i; }
- next;
- }
+ # what can follow a one-line anonymous sub closing curly:
+ # one-line anonymous subs also have ']' here...
+ # see tk3.t and PP.pm
+ my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
+ push @asf1, ',';
+ @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
- # and continue in blank (BL) types
- if ( $CODE_type eq 'BL' ) {
- if ( $ibeg >= 0 ) {
- $iend = $i;
- push @{iblanks}, $i;
+ # What can follow a closing curly of a block
+ # which is not an if/elsif/else/do/sort/map/grep/eval/sub
+ # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
+ my @obf = qw# ; : => or and && || ) #;
+ push @obf, ',';
+ @is_other_brace_follower{@obf} = (1) x scalar(@obf);
- # propagate current subgroup token
- my $tok = $group[-1]->[1];
- push @group, [ $i, $tok, $count ];
- }
- next;
- }
+ $right_bond_strength{'{'} = WEAK;
+ $left_bond_strength{'{'} = VERY_STRONG;
- # examine the first token of this line
- my $rK_range = $line_of_tokens->{_rK_range};
- ( $K_first, $K_last ) = @{$rK_range};
- if ( !defined($K_first) ) {
+ # make -l=0 equal to -l=infinite
+ if ( !$rOpts->{'maximum-line-length'} ) {
+ $rOpts->{'maximum-line-length'} = 1000000;
+ }
- # Somewhat unexpected blank line..
- # $rK_range is normally defined for line type CODE, but this can
- # happen for example if the input line was a single semicolon which
- # is being deleted. In that case there was code in the input
- # file but it is not being retained. So we can silently return.
- return $rhash_of_desires;
- }
+ # make -lbl=0 equal to -lbl=infinite
+ if ( !$rOpts->{'long-block-line-count'} ) {
+ $rOpts->{'long-block-line-count'} = 1000000;
+ }
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $type = $rLL->[$K_first]->[_TYPE_];
- my $token = $rLL->[$K_first]->[_TOKEN_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ my $ole = $rOpts->{'output-line-ending'};
+ if ($ole) {
+ my %endings = (
+ dos => "\015\012",
+ win => "\015\012",
+ mac => "\015",
+ unix => "\012",
+ );
- # see if this is a code type we seek (i.e. comment)
- if ( $CODE_type
- && $Opt_comment_pattern
- && $CODE_type =~ /$Opt_comment_pattern/ )
- {
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
- my $tok = $CODE_type;
+ if ( defined( $endings_inverted{$ole} ) ) {
- # Continuing a group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $tok, $level );
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Die(<<EOM);
+Unrecognized line ending '$ole'; expecting one of: $str
+EOM
}
-
- # Start new group
- else {
-
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
- $add_to_group->( $i, $tok, $level );
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Warn("Ignoring -ple; conflicts with -ole\n");
+ $rOpts->{'preserve-line-endings'} = undef;
}
- next;
}
+ }
- # See if it is a keyword we seek, but never start a group in a
- # continuation line; the code may be badly formatted.
- if ( $ci_level == 0
- && $type eq 'k'
- && $token =~ /$Opt_pattern/ )
- {
-
- # Continuing a keyword group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $token, $level );
- }
-
- # Start new keyword group
- else {
+ # hashes used to simplify setting whitespace
+ %tightness = (
+ '{' => $rOpts->{'brace-tightness'},
+ '}' => $rOpts->{'brace-tightness'},
+ '(' => $rOpts->{'paren-tightness'},
+ ')' => $rOpts->{'paren-tightness'},
+ '[' => $rOpts->{'square-bracket-tightness'},
+ ']' => $rOpts->{'square-bracket-tightness'},
+ );
+ %matching_token = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '?' => ':',
+ );
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
- $add_to_group->( $i, $token, $level );
- }
- next;
+ if ( $rOpts->{'ignore-old-breakpoints'} ) {
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
+ );
+ }
+ if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
+ );
+ }
+ if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
+ );
}
- # This is not one of our keywords, but we are in a keyword group
- # so see if we should continue or quit
- elsif ( $ibeg >= 0 ) {
+ # Note: there are additional parameters that can be made inactive by
+ # -iob, but they are on by default so we would generate excessive
+ # warnings if we noted them. They are:
+ # $rOpts->{'break-at-old-keyword-breakpoints'}
+ # $rOpts->{'break-at-old-logical-breakpoints'}
+ # $rOpts->{'break-at-old-ternary-breakpoints'}
+ # $rOpts->{'break-at-old-attribute-breakpoints'}
+ }
- # - bail out on a large level change; we may have walked into a
- # data structure or anoymous sub code.
- if ( $level > $level_beg + 1 || $level < $level_beg ) {
- $end_group->();
- next;
- }
+ # very frequently used parameters made global for efficiency
+ $rOpts_closing_side_comment_maximum_text =
+ $rOpts->{'closing-side-comment-maximum-text'};
+ $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
+ $rOpts_indent_columns = $rOpts->{'indent-columns'};
+ $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
+ $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
- # - keep going on a continuation line of the same level, since
- # it is probably a continuation of our previous keyword,
- # - and keep going past hanging side comments because we never
- # want to interrupt them.
- if ( ( ( $level == $level_beg ) && $ci_level > 0 )
- || $CODE_type eq 'HSC' )
- {
- $iend = $i;
- next;
- }
+ # Note that both opening and closing tokens can access the opening
+ # and closing flags of their container types.
+ %opening_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness'},
+ '{' => $rOpts->{'brace-vertical-tightness'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness'},
+ ')' => $rOpts->{'paren-vertical-tightness'},
+ '}' => $rOpts->{'brace-vertical-tightness'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness'},
+ );
- # - continue if if we are within in a container which started with
- # the line of the previous keyword.
- if ( defined($K_closing) && $K_first <= $K_closing ) {
-
- # continue if entire line is within container
- if ( $K_last <= $K_closing ) { $iend = $i; next }
+ %closing_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness-closing'},
+ '{' => $rOpts->{'brace-vertical-tightness-closing'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ ')' => $rOpts->{'paren-vertical-tightness-closing'},
+ '}' => $rOpts->{'brace-vertical-tightness-closing'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ );
- # continue at ); or }; or ];
- my $KK = $K_closing + 1;
- if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
- if ( $KK < $K_last ) {
- if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
- if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
- $end_group->(1);
- next;
- }
- }
- $iend = $i;
- next;
- }
+ # assume flag for '>' same as ')' for closing qw quotes
+ %closing_token_indentation = (
+ ')' => $rOpts->{'closing-paren-indentation'},
+ '}' => $rOpts->{'closing-brace-indentation'},
+ ']' => $rOpts->{'closing-square-bracket-indentation'},
+ '>' => $rOpts->{'closing-paren-indentation'},
+ );
- $end_group->(1);
- next;
- }
+ # flag indicating if any closing tokens are indented
+ $some_closing_token_indentation =
+ $rOpts->{'closing-paren-indentation'}
+ || $rOpts->{'closing-brace-indentation'}
+ || $rOpts->{'closing-square-bracket-indentation'}
+ || $rOpts->{'indent-closing-brace'};
- # - end the group if none of the above
- $end_group->();
- next;
- }
+ %opening_token_right = (
+ '(' => $rOpts->{'opening-paren-right'},
+ '{' => $rOpts->{'opening-hash-brace-right'},
+ '[' => $rOpts->{'opening-square-bracket-right'},
+ );
- # not in a keyword group; continue
- else { next }
- }
+ %stack_opening_token = (
+ '(' => $rOpts->{'stack-opening-paren'},
+ '{' => $rOpts->{'stack-opening-hash-brace'},
+ '[' => $rOpts->{'stack-opening-square-bracket'},
+ );
- # end of loop over all lines
- $end_group->();
- return $rhash_of_desires;
+ %stack_closing_token = (
+ ')' => $rOpts->{'stack-closing-paren'},
+ '}' => $rOpts->{'stack-closing-hash-brace'},
+ ']' => $rOpts->{'stack-closing-square-bracket'},
+ );
+ return;
+}
-} ## end sub keyword_group_scan
+sub initialize_whitespace_hashes {
-sub process_all_lines {
+ # This is called once before formatting begins to initialize these global
+ # hashes, which control the use of whitespace around tokens:
+ #
+ # %binary_ws_rules
+ # %want_left_space
+ # %want_right_space
+ # %space_after_keyword
+ #
+ # Many token types are identical to the tokens themselves.
+ # See the tokenizer for a complete list. Here are some special types:
+ # k = perl keyword
+ # f = semicolon in for statement
+ # m = unary minus
+ # p = unary plus
+ # Note that :: is excluded since it should be contained in an identifier
+ # Note that '->' is excluded because it never gets space
+ # parentheses and brackets are excluded since they are handled specially
+ # curly braces are included but may be overridden by logic, such as
+ # newline logic.
- # Main loop over all lines of a file.
- # Lines are processed according to type.
+ # NEW_TOKENS: create a whitespace rule here. This can be as
+ # simple as adding your new letter to @spaces_both_sides, for
+ # example.
- my $self = shift;
- my $rlines = $self->[_rlines_];
- my $sink_object = $self->[_sink_object_];
- my $fh_tee = $self->[_fh_tee_];
- my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
- my $file_writer_object = $self->[_file_writer_object_];
+ my @opening_type = qw< L { ( [ >;
+ @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
- # Note for RT#118553, leave only one newline at the end of a file.
- # Example code to do this is in comments below:
- # my $Opt_trim_ending_blank_lines = 0;
- # if ($Opt_trim_ending_blank_lines) {
- # while ( my $line_of_tokens = pop @{$rlines} ) {
- # my $line_type = $line_of_tokens->{_line_type};
- # if ( $line_type eq 'CODE' ) {
- # my $CODE_type = $line_of_tokens->{_code_type};
- # next if ( $CODE_type eq 'BL' );
- # }
- # push @{$rlines}, $line_of_tokens;
- # last;
- # }
- # }
+ my @closing_type = qw< R } ) ] >;
+ @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
- # But while this would be a trivial update, it would have very undesirable
- # side effects when perltidy is run from within an editor on a small snippet.
- # So this is best done with a separate filter, such
- # as 'delete_ending_blank_lines.pl' in the examples folder.
+ my @spaces_both_sides = qw#
+ + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
+ .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
+ &&= ||= //= <=> A k f w F n C Y U G v
+ #;
- # Flag to prevent blank lines when POD occurs in a format skipping sect.
- my $in_format_skipping_section;
+ my @spaces_left_side = qw<
+ t ! ~ m p { \ h pp mm Z j
+ >;
+ push( @spaces_left_side, '#' ); # avoids warning message
- # set locations for blanks around long runs of keywords
- my $rwant_blank_line_after = $self->keyword_group_scan();
+ my @spaces_right_side = qw<
+ ; } ) ] R J ++ -- **=
+ >;
+ push( @spaces_right_side, ',' ); # avoids warning message
- my $line_type = "";
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $i++;
+ %want_left_space = ();
+ %want_right_space = ();
+ %binary_ws_rules = ();
- # insert blank lines requested for keyword sequences
- if ( $i > 0
- && defined( $rwant_blank_line_after->{ $i - 1 } )
- && $rwant_blank_line_after->{ $i - 1 } == 1 )
- {
- $self->want_blank_line();
- }
+ # Note that we setting defaults here. Later in processing
+ # the values of %want_left_space and %want_right_space
+ # may be overridden by any user settings specified by the
+ # -wls and -wrs parameters. However the binary_whitespace_rules
+ # are hardwired and have priority.
+ @want_left_space{@spaces_both_sides} =
+ (1) x scalar(@spaces_both_sides);
+ @want_right_space{@spaces_both_sides} =
+ (1) x scalar(@spaces_both_sides);
+ @want_left_space{@spaces_left_side} =
+ (1) x scalar(@spaces_left_side);
+ @want_right_space{@spaces_left_side} =
+ (-1) x scalar(@spaces_left_side);
+ @want_left_space{@spaces_right_side} =
+ (-1) x scalar(@spaces_right_side);
+ @want_right_space{@spaces_right_side} =
+ (1) x scalar(@spaces_right_side);
+ $want_left_space{'->'} = WS_NO;
+ $want_right_space{'->'} = WS_NO;
+ $want_left_space{'**'} = WS_NO;
+ $want_right_space{'**'} = WS_NO;
+ $want_right_space{'CORE::'} = WS_NO;
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
+ # These binary_ws_rules are hardwired and have priority over the above
+ # settings. It would be nice to allow adjustment by the user,
+ # but it would be complicated to specify.
+ #
+ # hash type information must stay tightly bound
+ # as in : ${xxxx}
+ $binary_ws_rules{'i'}{'L'} = WS_NO;
+ $binary_ws_rules{'i'}{'{'} = WS_YES;
+ $binary_ws_rules{'k'}{'{'} = WS_YES;
+ $binary_ws_rules{'U'}{'{'} = WS_YES;
+ $binary_ws_rules{'i'}{'['} = WS_NO;
+ $binary_ws_rules{'R'}{'L'} = WS_NO;
+ $binary_ws_rules{'R'}{'{'} = WS_NO;
+ $binary_ws_rules{'t'}{'L'} = WS_NO;
+ $binary_ws_rules{'t'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'L'} = WS_NO;
+ $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
+ $binary_ws_rules{'$'}{'L'} = WS_NO;
+ $binary_ws_rules{'$'}{'{'} = WS_NO;
+ $binary_ws_rules{'@'}{'L'} = WS_NO;
+ $binary_ws_rules{'@'}{'{'} = WS_NO;
+ $binary_ws_rules{'='}{'L'} = WS_YES;
+ $binary_ws_rules{'J'}{'J'} = WS_YES;
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
+ # the following includes ') {'
+ # as in : if ( xxx ) { yyy }
+ $binary_ws_rules{']'}{'L'} = WS_NO;
+ $binary_ws_rules{']'}{'{'} = WS_NO;
+ $binary_ws_rules{')'}{'{'} = WS_YES;
+ $binary_ws_rules{')'}{'['} = WS_NO;
+ $binary_ws_rules{']'}{'['} = WS_NO;
+ $binary_ws_rules{']'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'['} = WS_NO;
+ $binary_ws_rules{'R'}{'['} = WS_NO;
- # put a blank line after an =cut which comes before __END__ and __DATA__
- # (required by podchecker)
- if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
- $file_writer_object->reset_consecutive_blank_lines();
- if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
- $self->want_blank_line();
- }
- }
+ $binary_ws_rules{']'}{'++'} = WS_NO;
+ $binary_ws_rules{']'}{'--'} = WS_NO;
+ $binary_ws_rules{')'}{'++'} = WS_NO;
+ $binary_ws_rules{')'}{'--'} = WS_NO;
- # handle line of code..
- if ( $line_type eq 'CODE' ) {
+ $binary_ws_rules{'R'}{'++'} = WS_NO;
+ $binary_ws_rules{'R'}{'--'} = WS_NO;
- my $CODE_type = $line_of_tokens->{_code_type};
- $in_format_skipping_section = $CODE_type eq 'FS';
+ $binary_ws_rules{'i'}{'Q'} = WS_YES;
+ $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
- # Handle blank lines
- if ( $CODE_type eq 'BL' ) {
+ # FIXME: we could to split 'i' into variables and functions
+ # and have no space for functions but space for variables. For now,
+ # I have a special patch in the special rules below
+ $binary_ws_rules{'i'}{'('} = WS_NO;
- # If keep-old-blank-lines is zero, we delete all
- # old blank lines and let the blank line rules generate any
- # needed blanks.
+ $binary_ws_rules{'w'}{'('} = WS_NO;
+ $binary_ws_rules{'w'}{'{'} = WS_YES;
+ return;
- # and delete lines requested by the keyword-group logic
- my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
- && $rwant_blank_line_after->{$i} == 2 );
+} ## end initialize_whitespace_hashes
- # But: the keep-old-blank-lines flag has priority over kgb flags
- $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
+sub set_whitespace_flags {
- if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
- $self->flush($CODE_type);
- $file_writer_object->write_blank_code_line(
- $rOpts_keep_old_blank_lines == 2 );
- $self->[_last_line_leading_type_] = 'b';
- }
- next;
- }
- else {
+ # This routine is called once per file to set whitespace flags for that
+ # file. This routine examines each pair of nonblank tokens and sets a flag
+ # indicating if white space is needed.
+ #
+ # $rwhitespace_flags->[$j] is a flag indicating whether a white space
+ # BEFORE token $j is needed, with the following values:
+ #
+ # WS_NO = -1 do not want a space BEFORE token $j
+ # WS_OPTIONAL= 0 optional space or $j is a whitespace
+ # WS_YES = 1 want a space BEFORE token $j
+ #
- # let logger see all non-blank lines of code
- my $output_line_number = $self->get_output_line_number();
- black_box( $line_of_tokens, $output_line_number );
- }
+ my $self = shift;
+ my $rLL = $self->[_rLL_];
+ my $DEBUG_WHITE;
- # Handle Format Skipping (FS) and Verbatim (VB) Lines
- if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
- $self->write_unindented_line("$input_line");
- $file_writer_object->reset_consecutive_blank_lines();
- next;
- }
+ my $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
+ my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
+ my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- # Handle all other lines of code
- $self->process_line_of_CODE($line_of_tokens);
- }
+ my $rwhitespace_flags = [];
- # handle line of non-code..
- else {
+ my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
- # set special flags
- my $skip_line = 0;
- if ( $line_type =~ /^POD/ ) {
+ my ( $token, $type, $block_type, $seqno, $input_line_no );
+ my (
+ $last_token, $last_type, $last_block_type,
+ $last_seqno, $last_input_line_no
+ );
- # Pod docs should have a preceding blank line. But stay
- # out of __END__ and __DATA__ sections, because
- # the user may be using this section for any purpose whatsoever
- if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
- if ( !$skip_line
- && !$in_format_skipping_section
- && $line_type eq 'POD_START'
- && !$self->[_saw_END_or_DATA_] )
- {
- $self->want_blank_line();
- }
- if ( $rOpts->{'tee-pod'} ) {
- $fh_tee->print($input_line) if ($fh_tee);
- }
- }
+ my $j_tight_closing_paren = -1;
- # leave the blank counters in a predictable state
- # after __END__ or __DATA__
- elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $self->[_saw_END_or_DATA_] = 1;
- }
+ $token = ' ';
+ $type = 'b';
+ $block_type = '';
+ $seqno = '';
+ $input_line_no = 0;
+ $last_token = ' ';
+ $last_type = 'b';
+ $last_block_type = '';
+ $last_seqno = '';
+ $last_input_line_no = 0;
- # write unindented non-code line
- if ( !$skip_line ) {
- $self->write_unindented_line($input_line);
- }
- }
- }
- return;
+ my $jmax = @{$rLL} - 1;
-} ## end sub process_all_lines
+ my ($ws);
-{ ## begin closure check_line_hashes
+ # This is some logic moved to a sub to avoid deep nesting of if stmts
+ my $ws_in_container = sub {
- # This code checks that no autovivification occurs in the 'line' hash
+ my ($j) = @_;
+ my $ws = WS_YES;
+ if ( $j + 1 > $jmax ) { return (WS_NO) }
- my %valid_line_hash;
+ # Patch to count '-foo' as single token so that
+ # each of $a{-foo} and $a{foo} and $a{'foo'} do
+ # not get spaces with default formatting.
+ my $j_here = $j;
+ ++$j_here
+ if ( $token eq '-'
+ && $last_token eq '{'
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
- BEGIN {
+ # $j_next is where a closing token should be if
+ # the container has a single token
+ if ( $j_here + 1 > $jmax ) { return (WS_NO) }
+ my $j_next =
+ ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
+ ? $j_here + 2
+ : $j_here + 1;
- # These keys are defined for each line in the formatter
- # Each line must have exactly these quantities
- my @valid_line_keys = qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _rK_range
- _square_bracket_depth
- _starting_in_quote
- _ended_in_blank_token
- _code_type
+ if ( $j_next > $jmax ) { return WS_NO }
+ my $tok_next = $rLL->[$j_next]->[_TOKEN_];
+ my $type_next = $rLL->[$j_next]->[_TYPE_];
- _ci_level_0
- _level_0
- _nesting_blocks_0
- _nesting_tokens_0
- );
+ # for tightness = 1, if there is just one token
+ # within the matching pair, we will keep it tight
+ if (
+ $tok_next eq $matching_token{$last_token}
- @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
- }
+ # but watch out for this: [ [ ] (misc.t)
+ && $last_token ne $token
- sub check_line_hashes {
- my $self = shift;
- my $rlines = $self->[_rlines_];
- foreach my $rline ( @{$rlines} ) {
- my $iline = $rline->{_line_number};
- my $line_type = $rline->{_line_type};
- check_keys( $rline, \%valid_line_hash,
- "Checkpoint: line number =$iline, line_type=$line_type", 1 );
- }
- return;
- }
-} ## end closure check_line_hashes
+ # double diamond is usually spaced
+ && $token ne '<<>>'
-sub write_line {
+ )
+ {
- # This routine originally received lines of code and immediately processed
- # them. That was efficient when memory was limited, but now it just saves
- # the lines it receives. They get processed all together after the last
- # line is received.
+ # remember where to put the space for the closing paren
+ $j_tight_closing_paren = $j_next;
+ return (WS_NO);
+ }
+ return (WS_YES);
+ };
- # As tokenized lines are received they are converted to the format needed
- # for the final formatting.
- my ( $self, $line_of_tokens_old ) = @_;
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines_new = $self->[_rlines_];
+ # Local hashes to set spaces around container tokens according to their
+ # sequence numbers. These are set as keywords are examined.
+ # They are controlled by the -kpit and -kpitl flags.
+ my %opening_container_inside_ws;
+ my %closing_container_inside_ws;
+ my $set_container_ws_by_keyword = sub {
- my $Kfirst;
- my $line_of_tokens = {};
- foreach my $key (
- qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _square_bracket_depth
- _starting_in_quote
- )
- )
- {
- $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
- }
+ return unless (%keyword_paren_inner_tightness);
- # Data needed by Logger
- $line_of_tokens->{_level_0} = 0;
- $line_of_tokens->{_ci_level_0} = 0;
- $line_of_tokens->{_nesting_blocks_0} = "";
- $line_of_tokens->{_nesting_tokens_0} = "";
+ my ( $word, $sequence_number ) = @_;
- # Needed to avoid trimming quotes
- $line_of_tokens->{_ended_in_blank_token} = undef;
+ # We just saw a keyword (or other function name) followed by an opening
+ # paren. Now check to see if the following paren should have special
+ # treatment for its inside space. If so we set a hash value using the
+ # sequence number as key.
+ if ( $word && $sequence_number ) {
+ my $tightness = $keyword_paren_inner_tightness{$word};
+ if ( defined($tightness) && $tightness != 1 ) {
+ my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
+ $opening_container_inside_ws{$sequence_number} = $ws_flag;
+ $closing_container_inside_ws{$sequence_number} = $ws_flag;
+ }
+ }
+ };
- my $line_type = $line_of_tokens_old->{_line_type};
- my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
- if ( $line_type eq 'CODE' ) {
+ my $ws_opening_container_override = sub {
+ my ( $ws, $sequence_number ) = @_;
+ return $ws unless (%opening_container_inside_ws);
+ if ($sequence_number) {
+ my $ws_override = $opening_container_inside_ws{$sequence_number};
+ if ($ws_override) { $ws = $ws_override }
+ }
+ return $ws;
+ };
- my $rtokens = $line_of_tokens_old->{_rtokens};
- my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
- my $rblock_type = $line_of_tokens_old->{_rblock_type};
- my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
- my $rcontainer_environment =
- $line_of_tokens_old->{_rcontainer_environment};
- my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
- my $rlevels = $line_of_tokens_old->{_rlevels};
- my $rslevels = $line_of_tokens_old->{_rslevels};
- my $rci_levels = $line_of_tokens_old->{_rci_levels};
- my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
- my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
+ my $ws_closing_container_override = sub {
+ my ( $ws, $sequence_number ) = @_;
+ return $ws unless (%closing_container_inside_ws);
+ if ($sequence_number) {
+ my $ws_override = $closing_container_inside_ws{$sequence_number};
+ if ($ws_override) { $ws = $ws_override }
+ }
+ return $ws;
+ };
- my $jmax = @{$rtokens} - 1;
- if ( $jmax >= 0 ) {
- $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- foreach my $j ( 0 .. $jmax ) {
+ # main loop over all tokens to define the whitespace flags
+ for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
- # Clip negative nesting depths to zero to avoid problems.
- # Negative values can occur in files with unbalanced containers
- my $slevel = $rslevels->[$j];
- if ( $slevel < 0 ) { $slevel = 0 }
+ my $rtokh = $rLL->[$j];
- # But do not clip the 'level' variable yet. We will do this later,
- # in sub 'store_token_to_go'. The reason is that in files with
- # level errors, the logic in 'weld_cuddled_else' uses a stack
- # logic that will give bad welds if we clip levels here.
- ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
+ # Set a default
+ $rwhitespace_flags->[$j] = WS_OPTIONAL;
- my @tokary;
- @tokary[
- _TOKEN_, _TYPE_,
- _BLOCK_TYPE_, _CONTAINER_TYPE_,
- _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
- _LEVEL_, _LEVEL_TRUE_,
- _SLEVEL_, _CI_LEVEL_,
- _LINE_INDEX_,
- ]
- = (
- $rtokens->[$j], $rtoken_type->[$j],
- $rblock_type->[$j], $rcontainer_type->[$j],
- $rcontainer_environment->[$j], $rtype_sequence->[$j],
- $rlevels->[$j], $rlevels->[$j],
- $slevel, $rci_levels->[$j],
- $input_line_no,
- );
- push @{$rLL}, \@tokary;
- }
+ if ( $rtokh->[_TYPE_] eq 'b' ) {
+ next;
+ }
- $Klimit = @{$rLL} - 1;
+ # set a default value, to be changed as needed
+ $ws = undef;
+ $last_token = $token;
+ $last_type = $type;
+ $last_block_type = $block_type;
+ $last_seqno = $seqno;
+ $last_input_line_no = $input_line_no;
+ $token = $rtokh->[_TOKEN_];
+ $type = $rtokh->[_TYPE_];
+ $block_type = $rtokh->[_BLOCK_TYPE_];
+ $seqno = $rtokh->[_TYPE_SEQUENCE_];
+ $input_line_no = $rtokh->[_LINE_INDEX_];
- # Need to remember if we can trim the input line
- $line_of_tokens->{_ended_in_blank_token} =
- $rtoken_type->[$jmax] eq 'b';
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 1:
+ # Handle space on the inside of opening braces.
+ #---------------------------------------------------------------
- $line_of_tokens->{_level_0} = $rlevels->[0];
- $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
- $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
- $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
- }
- }
+ # /^[L\{\(\[]$/
+ if ( $is_opening_type{$last_type} ) {
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
- $line_of_tokens->{_code_type} = "";
- $self->[_Klimit_] = $Klimit;
+ $j_tight_closing_paren = -1;
- push @{$rlines_new}, $line_of_tokens;
- return;
-}
+ # let us keep empty matched braces together: () {} []
+ # except for BLOCKS
+ if ( $token eq $matching_token{$last_token} ) {
+ if ($block_type) {
+ $ws = WS_YES;
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+ else {
-sub initialize_whitespace_hashes {
+ # we're considering the right of an opening brace
+ # tightness = 0 means always pad inside with space
+ # tightness = 1 means pad inside if "complex"
+ # tightness = 2 means never pad inside with space
- # This is called once before formatting begins to initialize these global
- # hashes, which control the use of whitespace around tokens:
- #
- # %binary_ws_rules
- # %want_left_space
- # %want_right_space
- # %space_after_keyword
- #
- # Many token types are identical to the tokens themselves.
- # See the tokenizer for a complete list. Here are some special types:
- # k = perl keyword
- # f = semicolon in for statement
- # m = unary minus
- # p = unary plus
- # Note that :: is excluded since it should be contained in an identifier
- # Note that '->' is excluded because it never gets space
- # parentheses and brackets are excluded since they are handled specially
- # curly braces are included but may be overridden by logic, such as
- # newline logic.
+ my $tightness;
+ if ( $last_type eq '{'
+ && $last_token eq '{'
+ && $last_block_type )
+ {
+ $tightness = $rOpts_block_brace_tightness;
+ }
+ else { $tightness = $tightness{$last_token} }
- # NEW_TOKENS: create a whitespace rule here. This can be as
- # simple as adding your new letter to @spaces_both_sides, for
- # example.
+ #=============================================================
+ # Patch for test problem <<snippets/fabrice_bug.in>>
+ # We must always avoid spaces around a bare word beginning
+ # with ^ as in:
+ # my $before = ${^PREMATCH};
+ # Because all of the following cause an error in perl:
+ # my $before = ${ ^PREMATCH };
+ # my $before = ${ ^PREMATCH};
+ # my $before = ${^PREMATCH };
+ # So if brace tightness flag is -bt=0 we must temporarily reset
+ # to bt=1. Note that here we must set tightness=1 and not 2 so
+ # that the closing space
+ # is also avoided (via the $j_tight_closing_paren flag in coding)
+ if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
- my @opening_type = qw< L { ( [ >;
- @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
+ #=============================================================
- my @closing_type = qw< R } ) ] >;
- @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
+ if ( $tightness <= 0 ) {
+ $ws = WS_YES;
+ }
+ elsif ( $tightness > 1 ) {
+ $ws = WS_NO;
+ }
+ else {
+ $ws = $ws_in_container->($j);
+ }
+ }
- my @spaces_both_sides = qw#
- + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
- .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
- &&= ||= //= <=> A k f w F n C Y U G v
- #;
+ # check for special cases which override the above rules
+ $ws = $ws_opening_container_override->( $ws, $last_seqno );
- my @spaces_left_side = qw<
- t ! ~ m p { \ h pp mm Z j
- >;
- push( @spaces_left_side, '#' ); # avoids warning message
+ } # end setting space flag inside opening tokens
+ my $ws_1;
+ $ws_1 = $ws
+ if $DEBUG_WHITE;
- my @spaces_right_side = qw<
- ; } ) ] R J ++ -- **=
- >;
- push( @spaces_right_side, ',' ); # avoids warning message
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 2:
+ # Handle space on inside of closing brace pairs.
+ #---------------------------------------------------------------
- %want_left_space = ();
- %want_right_space = ();
- %binary_ws_rules = ();
+ # /[\}\)\]R]/
+ if ( $is_closing_type{$type} ) {
- # Note that we setting defaults here. Later in processing
- # the values of %want_left_space and %want_right_space
- # may be overridden by any user settings specified by the
- # -wls and -wrs parameters. However the binary_whitespace_rules
- # are hardwired and have priority.
- @want_left_space{@spaces_both_sides} =
- (1) x scalar(@spaces_both_sides);
- @want_right_space{@spaces_both_sides} =
- (1) x scalar(@spaces_both_sides);
- @want_left_space{@spaces_left_side} =
- (1) x scalar(@spaces_left_side);
- @want_right_space{@spaces_left_side} =
- (-1) x scalar(@spaces_left_side);
- @want_left_space{@spaces_right_side} =
- (-1) x scalar(@spaces_right_side);
- @want_right_space{@spaces_right_side} =
- (1) x scalar(@spaces_right_side);
- $want_left_space{'->'} = WS_NO;
- $want_right_space{'->'} = WS_NO;
- $want_left_space{'**'} = WS_NO;
- $want_right_space{'**'} = WS_NO;
- $want_right_space{'CORE::'} = WS_NO;
+ if ( $j == $j_tight_closing_paren ) {
- # These binary_ws_rules are hardwired and have priority over the above
- # settings. It would be nice to allow adjustment by the user,
- # but it would be complicated to specify.
- #
- # hash type information must stay tightly bound
- # as in : ${xxxx}
- $binary_ws_rules{'i'}{'L'} = WS_NO;
- $binary_ws_rules{'i'}{'{'} = WS_YES;
- $binary_ws_rules{'k'}{'{'} = WS_YES;
- $binary_ws_rules{'U'}{'{'} = WS_YES;
- $binary_ws_rules{'i'}{'['} = WS_NO;
- $binary_ws_rules{'R'}{'L'} = WS_NO;
- $binary_ws_rules{'R'}{'{'} = WS_NO;
- $binary_ws_rules{'t'}{'L'} = WS_NO;
- $binary_ws_rules{'t'}{'{'} = WS_NO;
- $binary_ws_rules{'}'}{'L'} = WS_NO;
- $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
- $binary_ws_rules{'$'}{'L'} = WS_NO;
- $binary_ws_rules{'$'}{'{'} = WS_NO;
- $binary_ws_rules{'@'}{'L'} = WS_NO;
- $binary_ws_rules{'@'}{'{'} = WS_NO;
- $binary_ws_rules{'='}{'L'} = WS_YES;
- $binary_ws_rules{'J'}{'J'} = WS_YES;
+ $j_tight_closing_paren = -1;
+ $ws = WS_NO;
+ }
+ else {
- # the following includes ') {'
- # as in : if ( xxx ) { yyy }
- $binary_ws_rules{']'}{'L'} = WS_NO;
- $binary_ws_rules{']'}{'{'} = WS_NO;
- $binary_ws_rules{')'}{'{'} = WS_YES;
- $binary_ws_rules{')'}{'['} = WS_NO;
- $binary_ws_rules{']'}{'['} = WS_NO;
- $binary_ws_rules{']'}{'{'} = WS_NO;
- $binary_ws_rules{'}'}{'['} = WS_NO;
- $binary_ws_rules{'R'}{'['} = WS_NO;
-
- $binary_ws_rules{']'}{'++'} = WS_NO;
- $binary_ws_rules{']'}{'--'} = WS_NO;
- $binary_ws_rules{')'}{'++'} = WS_NO;
- $binary_ws_rules{')'}{'--'} = WS_NO;
+ if ( !defined($ws) ) {
- $binary_ws_rules{'R'}{'++'} = WS_NO;
- $binary_ws_rules{'R'}{'--'} = WS_NO;
+ my $tightness;
+ if ( $type eq '}' && $token eq '}' && $block_type ) {
+ $tightness = $rOpts_block_brace_tightness;
+ }
+ else { $tightness = $tightness{$token} }
- $binary_ws_rules{'i'}{'Q'} = WS_YES;
- $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
+ $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
+ }
+ }
- # FIXME: we could to split 'i' into variables and functions
- # and have no space for functions but space for variables. For now,
- # I have a special patch in the special rules below
- $binary_ws_rules{'i'}{'('} = WS_NO;
+ # check for special cases which override the above rules
+ $ws = $ws_closing_container_override->( $ws, $seqno );
- $binary_ws_rules{'w'}{'('} = WS_NO;
- $binary_ws_rules{'w'}{'{'} = WS_YES;
- return;
+ } # end setting space flag inside closing tokens
-} ## end initialize_whitespace_hashes
+ my $ws_2;
+ $ws_2 = $ws
+ if $DEBUG_WHITE;
-sub set_whitespace_flags {
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 3:
+ # Use the binary rule table.
+ #---------------------------------------------------------------
+ if ( !defined($ws) ) {
+ $ws = $binary_ws_rules{$last_type}{$type};
+ }
+ my $ws_3;
+ $ws_3 = $ws
+ if $DEBUG_WHITE;
- # This routine is called once per file to set whitespace flags for that
- # file. This routine examines each pair of nonblank tokens and sets a flag
- # indicating if white space is needed.
- #
- # $rwhitespace_flags->[$j] is a flag indicating whether a white space
- # BEFORE token $j is needed, with the following values:
- #
- # WS_NO = -1 do not want a space BEFORE token $j
- # WS_OPTIONAL= 0 optional space or $j is a whitespace
- # WS_YES = 1 want a space BEFORE token $j
- #
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 4:
+ # Handle some special cases.
+ #---------------------------------------------------------------
+ if ( $token eq '(' ) {
- my $self = shift;
- my $rLL = $self->[_rLL_];
- my $DEBUG_WHITE;
+ # This will have to be tweaked as tokenization changes.
+ # We usually want a space at '} (', for example:
+ # <<snippets/space1.in>>
+ # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
+ #
+ # But not others:
+ # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
+ # At present, the above & block is marked as type L/R so this case
+ # won't go through here.
+ if ( $last_type eq '}' ) { $ws = WS_YES }
- my $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
- my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
- my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+ # NOTE: some older versions of Perl had occasional problems if
+ # spaces are introduced between keywords or functions and opening
+ # parens. So the default is not to do this except is certain
+ # cases. The current Perl seems to tolerate spaces.
- my $rwhitespace_flags = [];
+ # Space between keyword and '('
+ elsif ( $last_type eq 'k' ) {
+ $ws = WS_NO
+ unless ( $rOpts_space_keyword_paren
+ || $space_after_keyword{$last_token} );
- my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
+ # Set inside space flag if requested
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ }
- my ( $token, $type, $block_type, $seqno, $input_line_no );
- my (
- $last_token, $last_type, $last_block_type,
- $last_seqno, $last_input_line_no
- );
+ # Space between function and '('
+ # -----------------------------------------------------
+ # 'w' and 'i' checks for something like:
+ # myfun( &myfun( ->myfun(
+ # -----------------------------------------------------
+ elsif (( $last_type =~ /^[wUG]$/ )
+ || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
+ {
+ $ws = WS_NO unless ($rOpts_space_function_paren);
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ }
- my $j_tight_closing_paren = -1;
+ # space between something like $i and ( in <<snippets/space2.in>>
+ # for $i ( 0 .. 20 ) {
+ # FIXME: eventually, type 'i' needs to be split into multiple
+ # token types so this can be a hardwired rule.
+ elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
+ $ws = WS_YES;
+ }
- $token = ' ';
- $type = 'b';
- $block_type = '';
- $seqno = '';
- $input_line_no = 0;
- $last_token = ' ';
- $last_type = 'b';
- $last_block_type = '';
- $last_seqno = '';
- $last_input_line_no = 0;
+ # allow constant function followed by '()' to retain no space
+ elsif ($last_type eq 'C'
+ && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
+ {
+ $ws = WS_NO;
+ }
+ }
- my $jmax = @{$rLL} - 1;
+ # patch for SWITCH/CASE: make space at ']{' optional
+ # since the '{' might begin a case or when block
+ elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
+ $ws = WS_OPTIONAL;
+ }
- my ($ws);
+ # keep space between 'sub' and '{' for anonymous sub definition
+ if ( $type eq '{' ) {
+ if ( $last_token eq 'sub' ) {
+ $ws = WS_YES;
+ }
- # This is some logic moved to a sub to avoid deep nesting of if stmts
- my $ws_in_container = sub {
+ # this is needed to avoid no space in '){'
+ if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
- my ($j) = @_;
- my $ws = WS_YES;
- if ( $j + 1 > $jmax ) { return (WS_NO) }
+ # avoid any space before the brace or bracket in something like
+ # @opts{'a','b',...}
+ if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
+ $ws = WS_NO;
+ }
+ }
- # Patch to count '-foo' as single token so that
- # each of $a{-foo} and $a{foo} and $a{'foo'} do
- # not get spaces with default formatting.
- my $j_here = $j;
- ++$j_here
- if ( $token eq '-'
- && $last_token eq '{'
- && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
+ elsif ( $type eq 'i' ) {
- # $j_next is where a closing token should be if
- # the container has a single token
- if ( $j_here + 1 > $jmax ) { return (WS_NO) }
- my $j_next =
- ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
- ? $j_here + 2
- : $j_here + 1;
+ # never a space before ->
+ if ( $token =~ /^\-\>/ ) {
+ $ws = WS_NO;
+ }
+ }
- if ( $j_next > $jmax ) { return WS_NO }
- my $tok_next = $rLL->[$j_next]->[_TOKEN_];
- my $type_next = $rLL->[$j_next]->[_TYPE_];
+ # retain any space between '-' and bare word
+ elsif ( $type eq 'w' || $type eq 'C' ) {
+ $ws = WS_OPTIONAL if $last_type eq '-';
- # for tightness = 1, if there is just one token
- # within the matching pair, we will keep it tight
- if (
- $tok_next eq $matching_token{$last_token}
+ # never a space before ->
+ if ( $token =~ /^\-\>/ ) {
+ $ws = WS_NO;
+ }
+ }
- # but watch out for this: [ [ ] (misc.t)
- && $last_token ne $token
+ # retain any space between '-' and bare word; for example
+ # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
+ # $myhash{USER-NAME}='steve';
+ elsif ( $type eq 'm' || $type eq '-' ) {
+ $ws = WS_OPTIONAL if ( $last_type eq 'w' );
+ }
- # double diamond is usually spaced
- && $token ne '<<>>'
+ # always space before side comment
+ elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+ # always preserver whatever space was used after a possible
+ # filehandle (except _) or here doc operator
+ if (
+ $type ne '#'
+ && ( ( $last_type eq 'Z' && $last_token ne '_' )
+ || $last_type eq 'h' )
)
{
-
- # remember where to put the space for the closing paren
- $j_tight_closing_paren = $j_next;
- return (WS_NO);
+ $ws = WS_OPTIONAL;
}
- return (WS_YES);
- };
-
- # Local hashes to set spaces around container tokens according to their
- # sequence numbers. These are set as keywords are examined.
- # They are controlled by the -kpit and -kpitl flags.
- my %opening_container_inside_ws;
- my %closing_container_inside_ws;
- my $set_container_ws_by_keyword = sub {
-
- return unless (%keyword_paren_inner_tightness);
- my ( $word, $sequence_number ) = @_;
-
- # We just saw a keyword (or other function name) followed by an opening
- # paren. Now check to see if the following paren should have special
- # treatment for its inside space. If so we set a hash value using the
- # sequence number as key.
- if ( $word && $sequence_number ) {
- my $tightness = $keyword_paren_inner_tightness{$word};
- if ( defined($tightness) && $tightness != 1 ) {
- my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
- $opening_container_inside_ws{$sequence_number} = $ws_flag;
- $closing_container_inside_ws{$sequence_number} = $ws_flag;
+ # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
+ # allow a space between a backslash and single or double quote
+ # to avoid fooling html formatters
+ elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
+ if ($rOpts_space_backslash_quote) {
+ if ( $rOpts_space_backslash_quote == 1 ) {
+ $ws = WS_OPTIONAL;
+ }
+ elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
+ else { } # shouldnt happen
+ }
+ else {
+ $ws = WS_NO;
}
}
- };
+ elsif ( $type eq 'k' ) {
- my $ws_opening_container_override = sub {
- my ( $ws, $sequence_number ) = @_;
- return $ws unless (%opening_container_inside_ws);
- if ($sequence_number) {
- my $ws_override = $opening_container_inside_ws{$sequence_number};
- if ($ws_override) { $ws = $ws_override }
- }
- return $ws;
- };
-
- my $ws_closing_container_override = sub {
- my ( $ws, $sequence_number ) = @_;
- return $ws unless (%closing_container_inside_ws);
- if ($sequence_number) {
- my $ws_override = $closing_container_inside_ws{$sequence_number};
- if ($ws_override) { $ws = $ws_override }
+ # Keywords 'for', 'foreach' are special cases for -kpit since the
+ # opening paren does not always immediately follow the keyword. So
+ # we have to search forward for the paren in this case. I have
+ # limited the search to 10 tokens ahead, just in case somebody
+ # has a big file and no opening paren. This should be enough for
+ # all normal code.
+ if ( $is_for_foreach{$token}
+ && %keyword_paren_inner_tightness
+ && defined( $keyword_paren_inner_tightness{$token} )
+ && $j < $jmax )
+ {
+ my $jp = $j;
+ for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
+ $jp++;
+ last if ( $jp > $jmax );
+ next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
+ my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
+ $set_container_ws_by_keyword->( $token, $seqno );
+ last;
+ }
+ }
}
- return $ws;
- };
- # main loop over all tokens to define the whitespace flags
- for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
+ my $ws_4;
+ $ws_4 = $ws
+ if $DEBUG_WHITE;
- my $rtokh = $rLL->[$j];
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 5:
+ # Apply default rules not covered above.
+ #---------------------------------------------------------------
- # Set a default
- $rwhitespace_flags->[$j] = WS_OPTIONAL;
+ # If we fall through to here, look at the pre-defined hash tables for
+ # the two tokens, and:
+ # if (they are equal) use the common value
+ # if (either is zero or undef) use the other
+ # if (either is -1) use it
+ # That is,
+ # left vs right
+ # 1 vs 1 --> 1
+ # 0 vs 0 --> 0
+ # -1 vs -1 --> -1
+ #
+ # 0 vs -1 --> -1
+ # 0 vs 1 --> 1
+ # 1 vs 0 --> 1
+ # -1 vs 0 --> -1
+ #
+ # -1 vs 1 --> -1
+ # 1 vs -1 --> -1
+ if ( !defined($ws) ) {
+ my $wl = $want_left_space{$type};
+ my $wr = $want_right_space{$last_type};
+ if ( !defined($wl) ) { $wl = 0 }
+ if ( !defined($wr) ) { $wr = 0 }
+ $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
+ }
- if ( $rtokh->[_TYPE_] eq 'b' ) {
- next;
+ if ( !defined($ws) ) {
+ $ws = 0;
+ write_diagnostics(
+ "WS flag is undefined for tokens $last_token $token\n");
}
- # set a default value, to be changed as needed
- $ws = undef;
- $last_token = $token;
- $last_type = $type;
- $last_block_type = $block_type;
- $last_seqno = $seqno;
- $last_input_line_no = $input_line_no;
- $token = $rtokh->[_TOKEN_];
- $type = $rtokh->[_TYPE_];
- $block_type = $rtokh->[_BLOCK_TYPE_];
- $seqno = $rtokh->[_TYPE_SEQUENCE_];
- $input_line_no = $rtokh->[_LINE_INDEX_];
+ # Treat newline as a whitespace. Otherwise, we might combine
+ # 'Send' and '-recipients' here according to the above rules:
+ # <<snippets/space3.in>>
+ # my $msg = new Fax::Send
+ # -recipients => $to,
+ # -data => $data;
+ if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
- #---------------------------------------------------------------
- # Whitespace Rules Section 1:
- # Handle space on the inside of opening braces.
- #---------------------------------------------------------------
+ if ( ( $ws == 0 )
+ && $j > 0
+ && $j < $jmax
+ && ( $last_type !~ /^[Zh]$/ ) )
+ {
- # /^[L\{\(\[]$/
- if ( $is_opening_type{$last_type} ) {
+ # If this happens, we have a non-fatal but undesirable
+ # hole in the above rules which should be patched.
+ write_diagnostics(
+ "WS flag is zero for tokens $last_token $token\n");
+ }
- $j_tight_closing_paren = -1;
+ $rwhitespace_flags->[$j] = $ws;
- # let us keep empty matched braces together: () {} []
- # except for BLOCKS
- if ( $token eq $matching_token{$last_token} ) {
- if ($block_type) {
- $ws = WS_YES;
- }
- else {
- $ws = WS_NO;
- }
- }
- else {
+ $DEBUG_WHITE && do {
+ my $str = substr( $last_token, 0, 15 );
+ $str .= ' ' x ( 16 - length($str) );
+ if ( !defined($ws_1) ) { $ws_1 = "*" }
+ if ( !defined($ws_2) ) { $ws_2 = "*" }
+ if ( !defined($ws_3) ) { $ws_3 = "*" }
+ if ( !defined($ws_4) ) { $ws_4 = "*" }
+ print STDOUT
+"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
+ };
+ } ## end main loop
- # we're considering the right of an opening brace
- # tightness = 0 means always pad inside with space
- # tightness = 1 means pad inside if "complex"
- # tightness = 2 means never pad inside with space
+ if ( $rOpts->{'tight-secret-operators'} ) {
+ new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
+ }
+ return $rwhitespace_flags;
- my $tightness;
- if ( $last_type eq '{'
- && $last_token eq '{'
- && $last_block_type )
- {
- $tightness = $rOpts_block_brace_tightness;
- }
- else { $tightness = $tightness{$last_token} }
+} ## end sub set_whitespace_flags
- #=============================================================
- # Patch for test problem <<snippets/fabrice_bug.in>>
- # We must always avoid spaces around a bare word beginning
- # with ^ as in:
- # my $before = ${^PREMATCH};
- # Because all of the following cause an error in perl:
- # my $before = ${ ^PREMATCH };
- # my $before = ${ ^PREMATCH};
- # my $before = ${^PREMATCH };
- # So if brace tightness flag is -bt=0 we must temporarily reset
- # to bt=1. Note that here we must set tightness=1 and not 2 so
- # that the closing space
- # is also avoided (via the $j_tight_closing_paren flag in coding)
- if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
+sub dump_want_left_space {
+ my $fh = shift;
+ local $" = "\n";
+ $fh->print(<<EOM);
+These values are the main control of whitespace to the left of a token type;
+They may be altered with the -wls parameter.
+For a list of token types, use perltidy --dump-token-types (-dtt)
+ 1 means the token wants a space to its left
+-1 means the token does not want a space to its left
+------------------------------------------------------------------------
+EOM
+ foreach my $key ( sort keys %want_left_space ) {
+ $fh->print("$key\t$want_left_space{$key}\n");
+ }
+ return;
+}
- #=============================================================
+sub dump_want_right_space {
+ my $fh = shift;
+ local $" = "\n";
+ $fh->print(<<EOM);
+These values are the main control of whitespace to the right of a token type;
+They may be altered with the -wrs parameter.
+For a list of token types, use perltidy --dump-token-types (-dtt)
+ 1 means the token wants a space to its right
+-1 means the token does not want a space to its right
+------------------------------------------------------------------------
+EOM
+ foreach my $key ( sort keys %want_right_space ) {
+ $fh->print("$key\t$want_right_space{$key}\n");
+ }
+ return;
+}
- if ( $tightness <= 0 ) {
- $ws = WS_YES;
- }
- elsif ( $tightness > 1 ) {
- $ws = WS_NO;
- }
- else {
- $ws = $ws_in_container->($j);
- }
- }
+{ ## begin closure is_essential_whitespace
- # check for special cases which override the above rules
- $ws = $ws_opening_container_override->( $ws, $last_seqno );
+ my %is_sort_grep_map;
+ my %is_for_foreach;
+ my %is_digraph;
+ my %is_trigraph;
- } # end setting space flag inside opening tokens
- my $ws_1;
- $ws_1 = $ws
- if $DEBUG_WHITE;
+ BEGIN {
- #---------------------------------------------------------------
- # Whitespace Rules Section 2:
- # Handle space on inside of closing brace pairs.
- #---------------------------------------------------------------
+ my @q;
+ @q = qw(sort grep map);
+ @is_sort_grep_map{@q} = (1) x scalar(@q);
- # /[\}\)\]R]/
- if ( $is_closing_type{$type} ) {
+ @q = qw(for foreach);
+ @is_for_foreach{@q} = (1) x scalar(@q);
- if ( $j == $j_tight_closing_paren ) {
+ @q = qw(
+ .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
+ );
+ @is_digraph{@q} = (1) x scalar(@q);
- $j_tight_closing_paren = -1;
- $ws = WS_NO;
- }
- else {
+ @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
+ @is_trigraph{@q} = (1) x scalar(@q);
+ }
- if ( !defined($ws) ) {
+ sub is_essential_whitespace {
- my $tightness;
- if ( $type eq '}' && $token eq '}' && $block_type ) {
- $tightness = $rOpts_block_brace_tightness;
- }
- else { $tightness = $tightness{$token} }
+ # Essential whitespace means whitespace which cannot be safely deleted
+ # without risking the introduction of a syntax error.
+ # We are given three tokens and their types:
+ # ($tokenl, $typel) is the token to the left of the space in question
+ # ($tokenr, $typer) is the token to the right of the space in question
+ # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
+ #
+ # This is a slow routine but is not needed too often except when -mangle
+ # is used.
+ #
+ # Note: This routine should almost never need to be changed. It is
+ # for avoiding syntax problems rather than for formatting.
- $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
- }
- }
+ my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
- # check for special cases which override the above rules
- $ws = $ws_closing_container_override->( $ws, $seqno );
+ my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
+ my $tokenr_is_open_paren = $tokenr eq '(';
+ my $token_joined = $tokenl . $tokenr;
+ my $tokenl_is_dash = $tokenl eq '-';
- } # end setting space flag inside closing tokens
+ my $result =
- my $ws_2;
- $ws_2 = $ws
- if $DEBUG_WHITE;
+ # never combine two bare words or numbers
+ # examples: and ::ok(1)
+ # return ::spw(...)
+ # for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ # $input eq"quit" to make $inputeq"quit"
+ # my $size=-s::SINK if $file; <==OK but we won't do it
+ # don't join something like: for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
+ && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
- #---------------------------------------------------------------
- # Whitespace Rules Section 3:
- # Use the binary rule table.
- #---------------------------------------------------------------
- if ( !defined($ws) ) {
- $ws = $binary_ws_rules{$last_type}{$type};
- }
- my $ws_3;
- $ws_3 = $ws
- if $DEBUG_WHITE;
+ # do not combine a number with a concatenation dot
+ # example: pom.caputo:
+ # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
+ || $typel eq 'n' && $tokenr eq '.'
+ || $typer eq 'n'
+ && $tokenl eq '.'
- #---------------------------------------------------------------
- # Whitespace Rules Section 4:
- # Handle some special cases.
- #---------------------------------------------------------------
- if ( $token eq '(' ) {
+ # cases of a space before a bareword...
+ || (
+ $tokenr_is_bareword && (
- # This will have to be tweaked as tokenization changes.
- # We usually want a space at '} (', for example:
- # <<snippets/space1.in>>
- # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
- #
- # But not others:
- # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
- # At present, the above & block is marked as type L/R so this case
- # won't go through here.
- if ( $last_type eq '}' ) { $ws = WS_YES }
+ # do not join a minus with a bare word, because you might form
+ # a file test operator. Example from Complex.pm:
+ # if (CORE::abs($z - i) < $eps);
+ # "z-i" would be taken as a file test.
+ $tokenl_is_dash && length($tokenr) == 1
- # NOTE: some older versions of Perl had occasional problems if
- # spaces are introduced between keywords or functions and opening
- # parens. So the default is not to do this except is certain
- # cases. The current Perl seems to tolerate spaces.
+ # and something like this could become ambiguous without space
+ # after the '-':
+ # use constant III=>1;
+ # $a = $b - III;
+ # and even this:
+ # $a = - III;
+ || $tokenl_is_dash && $typer =~ /^[wC]$/
- # Space between keyword and '('
- elsif ( $last_type eq 'k' ) {
- $ws = WS_NO
- unless ( $rOpts_space_keyword_paren
- || $space_after_keyword{$last_token} );
+ # keep a space between a quote and a bareword to prevent the
+ # bareword from becoming a quote modifier.
+ || $typel eq 'Q'
- # Set inside space flag if requested
- $set_container_ws_by_keyword->( $last_token, $seqno );
- }
+ # keep a space between a token ending in '$' and any word;
+ # this caused trouble: "die @$ if $@"
+ || $typel eq 'i' && $tokenl =~ /\$$/
- # Space between function and '('
- # -----------------------------------------------------
- # 'w' and 'i' checks for something like:
- # myfun( &myfun( ->myfun(
- # -----------------------------------------------------
- elsif (( $last_type =~ /^[wUG]$/ )
- || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
- {
- $ws = WS_NO unless ($rOpts_space_function_paren);
- $set_container_ws_by_keyword->( $last_token, $seqno );
- }
+ # do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ || $typel eq '&'
- # space between something like $i and ( in <<snippets/space2.in>>
- # for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' needs to be split into multiple
- # token types so this can be a hardwired rule.
- elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
- $ws = WS_YES;
- }
+ # don't combine $$ or $# with any alphanumeric
+ # (testfile mangle.t with --mangle)
+ || $tokenl =~ /^\$[\$\#]$/
- # allow constant function followed by '()' to retain no space
- elsif ($last_type eq 'C'
- && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
- {
- $ws = WS_NO;
- }
- }
+ )
+ ) ## end $tokenr_is_bareword
- # patch for SWITCH/CASE: make space at ']{' optional
- # since the '{' might begin a case or when block
- elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
- $ws = WS_OPTIONAL;
- }
+ # OLD, not used
+ # '= -' should not become =- or you will get a warning
+ # about reversed -=
+ # || ($tokenr eq '-')
- # keep space between 'sub' and '{' for anonymous sub definition
- if ( $type eq '{' ) {
- if ( $last_token eq 'sub' ) {
- $ws = WS_YES;
- }
+ # do not join a bare word with a minus, like between 'Send' and
+ # '-recipients' here <<snippets/space3.in>>
+ # my $msg = new Fax::Send
+ # -recipients => $to,
+ # -data => $data;
+ # This is the safest thing to do. If we had the token to the right of
+ # the minus we could do a better check.
+ || $tokenr eq '-' && $typel eq 'w'
- # this is needed to avoid no space in '){'
- if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+ # perl is very fussy about spaces before <<
+ || $tokenr =~ /^\<\</
- # avoid any space before the brace or bracket in something like
- # @opts{'a','b',...}
- if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
- $ws = WS_NO;
- }
- }
+ # avoid combining tokens to create new meanings. Example:
+ # $a+ +$b must not become $a++$b
+ || ( $is_digraph{$token_joined} )
+ || $is_trigraph{$token_joined}
- elsif ( $type eq 'i' ) {
+ # another example: do not combine these two &'s:
+ # allow_options & &OPT_EXECCGI
+ || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
- }
- }
+ # retain any space after possible filehandle
+ # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
+ || $typel eq 'Z'
- # retain any space between '-' and bare word
- elsif ( $type eq 'w' || $type eq 'C' ) {
- $ws = WS_OPTIONAL if $last_type eq '-';
+ # Perl is sensitive to whitespace after the + here:
+ # $b = xvals $a + 0.1 * yvals $a;
+ || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
- }
- }
+ || (
+ $tokenr_is_open_paren && (
- # retain any space between '-' and bare word; for example
- # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
- # $myhash{USER-NAME}='steve';
- elsif ( $type eq 'm' || $type eq '-' ) {
- $ws = WS_OPTIONAL if ( $last_type eq 'w' );
- }
+ # keep paren separate in 'use Foo::Bar ()'
+ ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
- # always space before side comment
- elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+ # keep any space between filehandle and paren:
+ # file mangle.t with --mangle:
+ || $typel eq 'Y'
- # always preserver whatever space was used after a possible
- # filehandle (except _) or here doc operator
- if (
- $type ne '#'
- && ( ( $last_type eq 'Z' && $last_token ne '_' )
- || $last_type eq 'h' )
- )
- {
- $ws = WS_OPTIONAL;
- }
+ # must have space between grep and left paren; "grep(" will fail
+ || $is_sort_grep_map{$tokenl}
- # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
- # allow a space between a backslash and single or double quote
- # to avoid fooling html formatters
- elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
- if ($rOpts_space_backslash_quote) {
- if ( $rOpts_space_backslash_quote == 1 ) {
- $ws = WS_OPTIONAL;
- }
- elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
- else { } # shouldnt happen
- }
- else {
- $ws = WS_NO;
- }
- }
- elsif ( $type eq 'k' ) {
+ # don't stick numbers next to left parens, as in:
+ #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
+ || $typel eq 'n'
+ )
+ ) ## end $tokenr_is_open_paren
- # Keywords 'for', 'foreach' are special cases for -kpit since the
- # opening paren does not always immediately follow the keyword. So
- # we have to search forward for the paren in this case. I have
- # limited the search to 10 tokens ahead, just in case somebody
- # has a big file and no opening paren. This should be enough for
- # all normal code.
- if ( $is_for_foreach{$token}
- && %keyword_paren_inner_tightness
- && defined( $keyword_paren_inner_tightness{$token} )
- && $j < $jmax )
- {
- my $jp = $j;
- for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
- $jp++;
- last if ( $jp > $jmax );
- next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
- my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
- $set_container_ws_by_keyword->( $token, $seqno );
- last;
- }
- }
- }
+ # retain any space after here doc operator ( hereerr.t)
+ || $typel eq 'h'
- my $ws_4;
- $ws_4 = $ws
- if $DEBUG_WHITE;
+ # be careful with a space around ++ and --, to avoid ambiguity as to
+ # which token it applies
+ || $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
+ || $typel =~ /^(\+\+|\-\-)$/
+ && $tokenr !~ /^[\;\}\)\]]/
- #---------------------------------------------------------------
- # Whitespace Rules Section 5:
- # Apply default rules not covered above.
- #---------------------------------------------------------------
+ # need space after foreach my; for example, this will fail in
+ # older versions of Perl:
+ # foreach my$ft(@filetypes)...
+ || (
+ $tokenl eq 'my'
- # If we fall through to here, look at the pre-defined hash tables for
- # the two tokens, and:
- # if (they are equal) use the common value
- # if (either is zero or undef) use the other
- # if (either is -1) use it
- # That is,
- # left vs right
- # 1 vs 1 --> 1
- # 0 vs 0 --> 0
- # -1 vs -1 --> -1
- #
- # 0 vs -1 --> -1
- # 0 vs 1 --> 1
- # 1 vs 0 --> 1
- # -1 vs 0 --> -1
- #
- # -1 vs 1 --> -1
- # 1 vs -1 --> -1
- if ( !defined($ws) ) {
- my $wl = $want_left_space{$type};
- my $wr = $want_right_space{$last_type};
- if ( !defined($wl) ) { $wl = 0 }
- if ( !defined($wr) ) { $wr = 0 }
- $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
- }
+ # /^(for|foreach)$/
+ && $is_for_foreach{$tokenll}
+ && $tokenr =~ /^\$/
+ )
- if ( !defined($ws) ) {
- $ws = 0;
- write_diagnostics(
- "WS flag is undefined for tokens $last_token $token\n");
- }
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
- # Treat newline as a whitespace. Otherwise, we might combine
- # 'Send' and '-recipients' here according to the above rules:
- # <<snippets/space3.in>>
- # my $msg = new Fax::Send
- # -recipients => $to,
- # -data => $data;
- if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
+ # space stacked labels (TODO: check if really necessary)
+ || $typel eq 'J' && $typer eq 'J'
- if ( ( $ws == 0 )
- && $j > 0
- && $j < $jmax
- && ( $last_type !~ /^[Zh]$/ ) )
- {
+ ; # the value of this long logic sequence is the result we want
+ return $result;
+ }
+} ## end closure is_essential_whitespace
- # If this happens, we have a non-fatal but undesirable
- # hole in the above rules which should be patched.
- write_diagnostics(
- "WS flag is zero for tokens $last_token $token\n");
- }
+{ ## begin closure new_secret_operator_whitespace
- $rwhitespace_flags->[$j] = $ws;
+ my %secret_operators;
+ my %is_leading_secret_token;
- $DEBUG_WHITE && do {
- my $str = substr( $last_token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- if ( !defined($ws_1) ) { $ws_1 = "*" }
- if ( !defined($ws_2) ) { $ws_2 = "*" }
- if ( !defined($ws_3) ) { $ws_3 = "*" }
- if ( !defined($ws_4) ) { $ws_4 = "*" }
- print STDOUT
-"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
- };
- } ## end main loop
+ BEGIN {
- if ( $rOpts->{'tight-secret-operators'} ) {
- new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
- }
- return $rwhitespace_flags;
+ # token lists for perl secret operators as compiled by Philippe Bruhat
+ # at: https://metacpan.org/module/perlsecret
+ %secret_operators = (
+ 'Goatse' => [qw#= ( ) =#], #=( )=
+ 'Venus1' => [qw#0 +#], # 0+
+ 'Venus2' => [qw#+ 0#], # +0
+ 'Enterprise' => [qw#) x ! !#], # ()x!!
+ 'Kite1' => [qw#~ ~ <>#], # ~~<>
+ 'Kite2' => [qw#~~ <>#], # ~~<>
+ 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
+ 'Bang bang ' => [qw#! !#], # !!
+ );
-} ## end sub set_whitespace_flags
+ # The following operators and constants are not included because they
+ # are normally kept tight by perltidy:
+ # ~~ <~>
+ #
-sub respace_tokens {
+ # Make a lookup table indexed by the first token of each operator:
+ # first token => [list, list, ...]
+ foreach my $value ( values(%secret_operators) ) {
+ my $tok = $value->[0];
+ push @{ $is_leading_secret_token{$tok} }, $value;
+ }
+ }
- my $self = shift;
- return if $rOpts->{'indent-only'};
+ sub new_secret_operator_whitespace {
- # This routine is called once per file to do as much formatting as possible
- # before new line breaks are set.
+ my ( $rlong_array, $rwhitespace_flags ) = @_;
- # This routine makes all necessary and possible changes to the tokenization
- # after the initial tokenization of the file. This is a tedious routine,
- # but basically it consists of inserting and deleting whitespace between
- # nonblank tokens according to the selected parameters. In a few cases
- # non-space characters are added, deleted or modified.
+ # Loop over all tokens in this line
+ my ( $token, $type );
+ my $jmax = @{$rlong_array} - 1;
+ foreach my $j ( 0 .. $jmax ) {
- # The goal of this routine is to create a new token array which only needs
- # the definition of new line breaks and padding to complete formatting. In
- # a few cases we have to cheat a little to achieve this goal. In
- # particular, we may not know if a semicolon will be needed, because it
- # depends on how the line breaks go. To handle this, we include the
- # semicolon as a 'phantom' which can be displayed as normal or as an empty
- # string.
+ $token = $rlong_array->[$j]->[_TOKEN_];
+ $type = $rlong_array->[$j]->[_TYPE_];
- # Method: The old tokens are copied one-by-one, with changes, from the old
- # linear storage array $rLL to a new array $rLL_new.
+ # Skip unless this token might start a secret operator
+ next if ( $type eq 'b' );
+ next unless ( $is_leading_secret_token{$token} );
- my $rLL = $self->[_rLL_];
- my $Klimit_old = $self->[_Klimit_];
- my $rlines = $self->[_rlines_];
- my $rpaired_to_inner_container = $self->[_rpaired_to_inner_container_];
- my $length_function = $self->[_length_function_];
+ # Loop over all secret operators with this leading token
+ foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
+ my $jend = $j - 1;
+ foreach my $tok ( @{$rpattern} ) {
+ $jend++;
+ $jend++
- my $rLL_new = []; # This is the new array
- my $KK = 0;
- my $rtoken_vars;
- my $Kmax = @{$rLL} - 1;
+ if ( $jend <= $jmax
+ && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
+ if ( $jend > $jmax
+ || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
+ {
+ $jend = undef;
+ last;
+ }
+ }
- my $CODE_type = "";
- my $line_type = "";
+ if ($jend) {
- my $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- my $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
- my $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
- my $rOpts_ignore_side_comment_lengths =
- $rOpts->{'ignore-side-comment-lengths'};
+ # set flags to prevent spaces within this operator
+ foreach my $jj ( $j + 1 .. $jend ) {
+ $rwhitespace_flags->[$jj] = WS_NO;
+ }
+ $j = $jend;
+ last;
+ }
+ } ## End Loop over all operators
+ } ## End loop over all tokens
+ return;
+ } # End sub
+} ## end closure new_secret_operator_whitespace
- # Set the whitespace flags, which indicate the token spacing preference.
- my $rwhitespace_flags = $self->set_whitespace_flags();
+{ ## begin closure set_bond_strengths
- # we will be setting token lengths as we go
- my $cumulative_length = 0;
+ # These routines and variables are involved in deciding where to break very
+ # long lines.
- # We also define these hash indexes giving container token array indexes
- # as a function of the container sequence numbers. For example,
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
+ my %is_good_keyword_breakpoint;
+ my %is_lt_gt_le_ge;
- # List of new K indexes of phantom semicolons
- # This will be needed if we want to undo them for iterations
- my $rK_phantom_semicolons = [];
+ my %binary_bond_strength;
+ my %nobreak_lhs;
+ my %nobreak_rhs;
- my %seqno_stack;
- my %KK_stack;
- my $depth_next = 0;
- my $rtype_count_by_seqno = {};
- my $ris_broken_container = {};
- my $rhas_broken_container = {};
- my $rparent_of_seqno = {};
- my $rchildren_of_seqno = {};
+ my @bias_tokens;
+ my $delta_bias;
- # a sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
+ sub bias_table_key {
+ my ( $type, $token ) = @_;
+ my $bias_table_key = $type;
+ if ( $type eq 'k' ) {
+ $bias_table_key = $token;
+ if ( $token eq 'err' ) { $bias_table_key = 'or' }
+ }
+ return $bias_table_key;
+ }
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL_new->[$Kprev]->[$key] ) )
- {
- $rLL_new->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
- }
- };
-
- # A sub to store one token in the new array
- # All new tokens must be stored by this sub so that it can update
- # all data structures on the fly.
- my $last_nonblank_type = ';';
- my $last_nonblank_token = ';';
- my $last_nonblank_block_type = '';
- my $nonblank_token_count = 0;
- my $store_token = sub {
- my ($item) = @_;
+ sub initialize_bond_strength_hashes {
- # This will be the index of this item in the new array
- my $KK_new = @{$rLL_new};
+ my @q;
+ @q = qw(if unless while until for foreach);
+ @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
- # check for a sequenced item (i.e., container or ?/:)
- my $type_sequence = $item->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
+ @q = qw(lt gt le ge);
+ @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
+ #
+ # The decision about where to break a line depends upon a "bond
+ # strength" between tokens. The LOWER the bond strength, the MORE
+ # likely a break. A bond strength may be any value but to simplify
+ # things there are several pre-defined strength levels:
- $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
+ # NO_BREAK => 10000;
+ # VERY_STRONG => 100;
+ # STRONG => 2.1;
+ # NOMINAL => 1.1;
+ # WEAK => 0.8;
+ # VERY_WEAK => 0.55;
- my $token = $item->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ # The strength values are based on trial-and-error, and need to be
+ # tweaked occasionally to get desired results. Some comments:
+ #
+ # 1. Only relative strengths are important. small differences
+ # in strengths can make big formatting differences.
+ # 2. Each indentation level adds one unit of bond strength.
+ # 3. A value of NO_BREAK makes an unbreakable bond
+ # 4. A value of VERY_WEAK is the strength of a ','
+ # 5. Values below NOMINAL are considered ok break points.
+ # 6. Values above NOMINAL are considered poor break points.
+ #
+ # The bond strengths should roughly follow precedence order where
+ # possible. If you make changes, please check the results very
+ # carefully on a variety of scripts. Testing with the -extrude
+ # options is particularly helpful in exercising all of the rules.
- $K_opening_container->{$type_sequence} = $KK_new;
- }
- elsif ( $is_closing_token{$token} ) {
+ # Wherever possible, bond strengths are defined in the following
+ # tables. There are two main stages to setting bond strengths and
+ # two types of tables:
+ #
+ # The first stage involves looking at each token individually and
+ # defining left and right bond strengths, according to if we want
+ # to break to the left or right side, and how good a break point it
+ # is. For example tokens like =, ||, && make good break points and
+ # will have low strengths, but one might want to break on either
+ # side to put them at the end of one line or beginning of the next.
+ #
+ # The second stage involves looking at certain pairs of tokens and
+ # defining a bond strength for that particular pair. This second
+ # stage has priority.
- $K_closing_container->{$type_sequence} = $KK_new;
- }
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 1.
+ # Set left and right bond strengths of individual tokens.
+ #---------------------------------------------------------------
- # These are not yet used but could be useful
- else {
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK_new;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK_new;
- }
- else {
- # shouldn't happen
- Fault("Ugh: shouldn't happen");
- }
- }
- }
+ # NOTE: NO_BREAK's set in this section first are HINTS which will
+ # probably not be honored. Essential NO_BREAKS's should be set in
+ # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
+ # of this subroutine.
- my $type = $item->[_TYPE_];
+ # Note that we are setting defaults in this section. The user
+ # cannot change bond strengths but can cause the left and right
+ # bond strengths of any token type to be swapped through the use of
+ # the -wba and -wbb flags. In this way the user can determine if a
+ # breakpoint token should appear at the end of one line or the
+ # beginning of the next line.
- # trim comments
- if ( $type eq '#' ) {
- $item->[_TOKEN_] =~ s/\s*$//;
- }
+ %right_bond_strength = ();
+ %left_bond_strength = ();
+ %binary_bond_strength = ();
+ %nobreak_lhs = ();
+ %nobreak_rhs = ();
- # Find the length of this token. Later it may be adjusted if phantom
- # or ignoring side comment lengths.
- my $token_length = $length_function->( $item->[_TOKEN_] );
+ # The hash keys in this section are token types, plus the text of
+ # certain keywords like 'or', 'and'.
- # Mark length of side comments as just 1 if their lengths are ignored
- if ( $type eq '#'
- && $rOpts_ignore_side_comment_lengths
- && ( !$CODE_type || $CODE_type eq 'HSC' ) )
- {
- $token_length = 1;
- }
+ # no break around possible filehandle
+ $left_bond_strength{'Z'} = NO_BREAK;
+ $right_bond_strength{'Z'} = NO_BREAK;
- $item->[_TOKEN_LENGTH_] = $token_length;
+ # never put a bare word on a new line:
+ # example print (STDERR, "bla"); will fail with break after (
+ $left_bond_strength{'w'} = NO_BREAK;
- # and update the cumulative length
- $cumulative_length += $token_length;
+ # blanks always have infinite strength to force breaks after
+ # real tokens
+ $right_bond_strength{'b'} = NO_BREAK;
- # Save the length sum to just AFTER this token
- $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ # try not to break on exponentation
+ @q = qw# ** .. ... <=> #;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
- if ( $type && $type ne 'b' && $type ne '#' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $item->[_TOKEN_];
- $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
- $nonblank_token_count++;
+ # The comma-arrow has very low precedence but not a good break point
+ $left_bond_strength{'=>'} = NO_BREAK;
+ $right_bond_strength{'=>'} = NOMINAL;
- # count selected types
- if ( $type =~ /^(=>|,)$/ ) {
- my $seqno = $seqno_stack{ $depth_next - 1 };
- if ( defined($seqno) ) {
- $rtype_count_by_seqno->{$seqno}->{$type}++;
- }
- }
- }
+ # ok to break after label
+ $left_bond_strength{'J'} = NO_BREAK;
+ $right_bond_strength{'J'} = NOMINAL;
+ $left_bond_strength{'j'} = STRONG;
+ $right_bond_strength{'j'} = STRONG;
+ $left_bond_strength{'A'} = STRONG;
+ $right_bond_strength{'A'} = STRONG;
- # and finally, add this item to the new array
- push @{$rLL_new}, $item;
- };
+ $left_bond_strength{'->'} = STRONG;
+ $right_bond_strength{'->'} = VERY_STRONG;
- my $store_token_and_space = sub {
- my ( $item, $want_space ) = @_;
+ $left_bond_strength{'CORE::'} = NOMINAL;
+ $right_bond_strength{'CORE::'} = NO_BREAK;
- # store a token with preceding space if requested and needed
+ # breaking AFTER modulus operator is ok:
+ @q = qw< % >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
- # First store the space
- if ( $want_space
- && @{$rLL_new}
- && $rLL_new->[-1]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace )
- {
- my $rcopy = copy_token_as_type( $item, 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
+ # Break AFTER math operators * and /
+ @q = qw< * / x >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # then the token
- $store_token->($item);
- };
+ # Break AFTER weakest math operators + and -
+ # Make them weaker than * but a bit stronger than '.'
+ @q = qw< + - >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
- my $K_end_q = sub {
- my ($KK) = @_;
- my $K_end = $KK;
- my $Kn = $self->K_next_nonblank($KK);
- while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
- $K_end = $Kn;
- $Kn = $self->K_next_nonblank($Kn);
- }
- return $K_end;
- };
+ # breaking BEFORE these is just ok:
+ @q = qw# >> << #;
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
- my $add_phantom_semicolon = sub {
+ # breaking before the string concatenation operator seems best
+ # because it can be hard to see at the end of a line
+ $right_bond_strength{'.'} = STRONG;
+ $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
- my ($KK) = @_;
+ @q = qw< } ] ) R >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
+ # make these a little weaker than nominal so that they get
+ # favored for end-of-line characters
+ @q = qw< != == =~ !~ ~~ !~~ >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
- # we are only adding semicolons for certain block types
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- return
- unless ( $ok_to_add_semicolon_for_block_type{$block_type}
- || $block_type =~ /^(sub|package)/
- || $block_type =~ /^\w+\:$/ );
+ # break AFTER these
+ @q = qw# < > | & >= <= #;
+ @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ # breaking either before or after a quote is ok
+ # but bias for breaking before a quote
+ $left_bond_strength{'Q'} = NOMINAL;
+ $right_bond_strength{'Q'} = NOMINAL + 0.02;
+ $left_bond_strength{'q'} = NOMINAL;
+ $right_bond_strength{'q'} = NOMINAL;
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ # starting a line with a keyword is usually ok
+ $left_bond_strength{'k'} = NOMINAL;
- # Do not add a semicolon if...
- return
- if (
+ # we usually want to bond a keyword strongly to what immediately
+ # follows, rather than leaving it stranded at the end of a line
+ $right_bond_strength{'k'} = STRONG;
- # it would follow a comment (and be isolated)
- $previous_nonblank_type eq '#'
+ $left_bond_strength{'G'} = NOMINAL;
+ $right_bond_strength{'G'} = STRONG;
- # it follows a code block ( because they are not always wanted
- # there and may add clutter)
- || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
+ # assignment operators
+ @q = qw(
+ = **= += *= &= <<= &&=
+ -= /= |= >>= ||= //=
+ .= %= ^=
+ x=
+ );
- # it would follow a label
- || $previous_nonblank_type eq 'J'
+ # Default is to break AFTER various assignment operators
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
- # it would be inside a 'format' statement (and cause syntax error)
- || ( $previous_nonblank_type eq 'k'
- && $previous_nonblank_token =~ /format/ )
+ # Default is to break BEFORE '&&' and '||' and '//'
+ # set strength of '||' to same as '=' so that chains like
+ # $a = $b || $c || $d will break before the first '||'
+ $right_bond_strength{'||'} = NOMINAL;
+ $left_bond_strength{'||'} = $right_bond_strength{'='};
- # if it would prevent welding two containers
- || $rpaired_to_inner_container->{$type_sequence}
+ # same thing for '//'
+ $right_bond_strength{'//'} = NOMINAL;
+ $left_bond_strength{'//'} = $right_bond_strength{'='};
- );
+ # set strength of && a little higher than ||
+ $right_bond_strength{'&&'} = NOMINAL;
+ $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
- # We will insert an empty semicolon here as a placeholder. Later, if
- # it becomes the last token on a line, we will bring it to life. The
- # advantage of doing this is that (1) we just have to check line
- # endings, and (2) the phantom semicolon has zero width and therefore
- # won't cause needless breaks of one-line blocks.
- my $Ktop = -1;
- if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
- && $want_left_space{';'} == WS_NO )
- {
+ $left_bond_strength{';'} = VERY_STRONG;
+ $right_bond_strength{';'} = VERY_WEAK;
+ $left_bond_strength{'f'} = VERY_STRONG;
- # convert the blank into a semicolon..
- # be careful: we are working on the new stack top
- # on a token which has been stored.
- my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
+ # make right strength of for ';' a little less than '='
+ # to make for contents break after the ';' to avoid this:
+ # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
+ # $number_of_fields )
+ # and make it weaker than ',' and 'and' too
+ $right_bond_strength{'f'} = VERY_WEAK - 0.03;
- # Convert the existing blank to:
- # a phantom semicolon for one_line_block option = 0 or 1
- # a real semicolon for one_line_block option = 2
- my $tok = '';
- my $len_tok = 0;
- if ( $rOpts_one_line_block_semicolons == 2 ) {
- $tok = ';';
- $len_tok = 1;
- }
+ # The strengths of ?/: should be somewhere between
+ # an '=' and a quote (NOMINAL),
+ # make strength of ':' slightly less than '?' to help
+ # break long chains of ? : after the colons
+ $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
+ $right_bond_strength{':'} = NO_BREAK;
+ $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
+ $right_bond_strength{'?'} = NO_BREAK;
- $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
- $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
- $rLL_new->[$Ktop]->[_TYPE_] = ';';
- $rLL_new->[$Ktop]->[_SLEVEL_] =
- $rLL->[$KK]->[_SLEVEL_];
+ $left_bond_strength{','} = VERY_STRONG;
+ $right_bond_strength{','} = VERY_WEAK;
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ # remaining digraphs and trigraphs not defined above
+ @q = qw( :: <> ++ --);
+ @left_bond_strength{@q} = (WEAK) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
- # Then store a new blank
- $store_token->($rcopy);
- }
- else {
+ # Set bond strengths of certain keywords
+ # make 'or', 'err', 'and' slightly weaker than a ','
+ $left_bond_strength{'and'} = VERY_WEAK - 0.01;
+ $left_bond_strength{'or'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'err'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
+ $right_bond_strength{'and'} = NOMINAL;
+ $right_bond_strength{'or'} = NOMINAL;
+ $right_bond_strength{'err'} = NOMINAL;
+ $right_bond_strength{'xor'} = NOMINAL;
- # insert a new token
- my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
- $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
- $store_token->($rcopy);
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- }
- };
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 2.
+ # Set binary rules for bond strengths between certain token types.
+ #---------------------------------------------------------------
- my $check_Q = sub {
+ # We have a little problem making tables which apply to the
+ # container tokens. Here is a list of container tokens and
+ # their types:
+ #
+ # type tokens // meaning
+ # { {, [, ( // indent
+ # } }, ], ) // outdent
+ # [ [ // left non-structural [ (enclosing an array index)
+ # ] ] // right non-structural square bracket
+ # ( ( // left non-structural paren
+ # ) ) // right non-structural paren
+ # L { // left non-structural curly brace (enclosing a key)
+ # R } // right non-structural curly brace
+ #
+ # Some rules apply to token types and some to just the token
+ # itself. We solve the problem by combining type and token into a
+ # new hash key for the container types.
+ #
+ # If a rule applies to a token 'type' then we need to make rules
+ # for each of these 'type.token' combinations:
+ # Type Type.Token
+ # { {{, {[, {(
+ # [ [[
+ # ( ((
+ # L L{
+ # } }}, }], })
+ # ] ]]
+ # ) ))
+ # R R}
+ #
+ # If a rule applies to a token then we need to make rules for
+ # these 'type.token' combinations:
+ # Token Type.Token
+ # { {{, L{
+ # [ {[, [[
+ # ( {(, ((
+ # } }}, R}
+ # ] }], ]]
+ # ) }), ))
- # Check that a quote looks okay
- # This sub works but needs to by sync'd with the log file output
- # before it can be used.
- my ( $KK, $Kfirst, $line_number ) = @_;
- my $token = $rLL->[$KK]->[_TOKEN_];
- $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
+ # allow long lines before final { in an if statement, as in:
+ # if (..........
+ # ..........)
+ # {
+ #
+ # Otherwise, the line before the { tends to be too short.
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
+ $binary_bond_strength{'(('}{'{{'} = NOMINAL;
- my $previous_nonblank_type_2 = 'b';
- my $previous_nonblank_token_2 = "";
- my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
- $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
- }
+ # break on something like '} (', but keep this stronger than a ','
+ # example is in 'howe.pl'
+ $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- my $Kn = $self->K_next_nonblank($KK);
- my $next_nonblank_token = "";
- if ( defined($Kn) ) {
- $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
- }
+ # keep matrix and hash indices together
+ # but make them a little below STRONG to allow breaking open
+ # something like {'some-word'}{'some-very-long-word'} at the }{
+ # (bracebrk.t)
+ $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
- my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
+ # increase strength to the point where a break in the following
+ # will be after the opening paren rather than at the arrow:
+ # $a->$b($c);
+ $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
- if (
- $token =~ /^(s|tr|y|m|\/)/
- && $previous_nonblank_token =~ /^(=|==|!=)$/
+ # Note that the following alternative strength would make the break at the
+ # '->' rather than opening the '('. Both have advantages and disadvantages.
+ # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
- # preceded by simple scalar
- && $previous_nonblank_type_2 eq 'i'
- && $previous_nonblank_token_2 =~ /^\$/
+ $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- # followed by some kind of termination
- # (but give complaint if we can not see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
+ $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
- # scalar is not declared
- && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
- )
- {
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
- complain(
-"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
- );
- }
- };
+ #---------------------------------------------------------------
+ # Binary NO_BREAK rules
+ #---------------------------------------------------------------
- # Main loop over all lines of the file
- my $last_K_out;
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
+ $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
- # Testing option to break qw. Do not use; it can make a mess.
- my $ALLOW_BREAK_MULTILINE_QW = 0;
- my $in_multiline_qw;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ $binary_bond_strength{'C'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'C'}{'{('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'{('} = NO_BREAK;
- my $input_line_number = $line_of_tokens->{_line_number};
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $last_CODE_type = $CODE_type;
- $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless defined($Kfirst);
+ # use strict requires that bare word within braces not start new
+ # line
+ $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
- # Check for correct sequence of token indexes...
- # An error here means that sub write_line() did not correctly
- # package the tokenized lines as it received them.
- if ( defined($last_K_out) ) {
- if ( $Kfirst != $last_K_out + 1 ) {
- Fault(
- "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
- );
- }
- }
- else {
- if ( $Kfirst != 0 ) {
- Fault("Program Bug: first K is $Kfirst but should be 0");
- }
- }
- $last_K_out = $Klast;
+ $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
- # Handle special lines of code
- if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
- # CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'FS' = Format Skipping - line goes out verbatim, no blanks
- # 'IO' = Indent Only - only indentation may be changed
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
+ # use strict does not allow separating type info from trailing { }
+ # testfile is readmail.pl
+ $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
+ $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
- # For a hanging side comment we insert an empty quote before
- # the comment so that it becomes a normal side comment and
- # will be aligned by the vertical aligner
- if ( $CODE_type eq 'HSC' ) {
+ # As a defensive measure, do not break between a '(' and a
+ # filehandle. In some cases, this can cause an error. For
+ # example, the following program works:
+ # my $msg="hi!\n";
+ # print
+ # ( STDOUT
+ # $msg
+ # );
+ #
+ # But this program fails:
+ # my $msg="hi!\n";
+ # print
+ # (
+ # STDOUT
+ # $msg
+ # );
+ #
+ # This is normally only a problem with the 'extrude' option
+ $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
+ $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
- # Safety Check: This must be a line with one token (a comment)
- my $rtoken_vars = $rLL->[$Kfirst];
- if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
+ # never break between sub name and opening paren
+ $binary_bond_strength{'w'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'w'}{'{('} = NO_BREAK;
- # Note that even if the flag 'noadd-whitespace' is set, we
- # will make an exception here and allow a blank to be
- # inserted to push the comment to the right. We can think
- # of this as an adjustment of indentation rather than
- # whitespace between tokens. This will also prevent the
- # hanging side comment from getting converted to a block
- # comment if whitespace gets deleted, as for example with
- # the -extrude and -mangle options.
- my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
- $store_token->($rcopy);
- $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- $store_token->($rtoken_vars);
- next;
- }
- else {
+ # keep '}' together with ';'
+ $binary_bond_strength{'}}'}{';'} = NO_BREAK;
- # This line was mis-marked by sub scan_comment
- Fault(
- "Program bug. A hanging side comment has been mismarked"
- );
- }
- }
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ $nobreak_lhs{'++'} = NO_BREAK;
- # Copy tokens unchanged
- foreach my $KK ( $Kfirst .. $Klast ) {
- $store_token->( $rLL->[$KK] );
- }
- next;
- }
+ # Do not break before a possible file handle
+ $nobreak_lhs{'Z'} = NO_BREAK;
- # Handle normal line..
+ # use strict hates bare words on any new line. For
+ # example, a break before the underscore here provokes the
+ # wrath of use strict:
+ # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
+ $nobreak_rhs{'F'} = NO_BREAK;
+ $nobreak_rhs{'CORE::'} = NO_BREAK;
- # Insert any essential whitespace between lines
- # if last line was normal CODE.
- # Patch for rt #125012: use K_previous_code rather than '_nonblank'
- # because comments may disappear.
- my $type_next = $rLL->[$Kfirst]->[_TYPE_];
- my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
- my $Kp = $self->K_previous_code( undef, $rLL_new );
- if ( $last_line_type eq 'CODE'
- && $type_next ne 'b'
- && defined($Kp) )
- {
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 3.
+ # Define tables and values for applying a small bias to the above
+ # values.
+ #---------------------------------------------------------------
+ # Adding a small 'bias' to strengths is a simple way to make a line
+ # break at the first of a sequence of identical terms. For
+ # example, to force long string of conditional operators to break
+ # with each line ending in a ':', we can add a small number to the
+ # bond strength of each ':' (colon.t)
+ @bias_tokens = qw( : && || f and or . ); # tokens which get bias
+ $delta_bias = 0.0001; # a very small strength level
+ return;
- my ( $token_pp, $type_pp );
- my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
+ } ## end sub initialize_bond_strength_hashes
- if (
- is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- )
- )
- {
+ my $DEBUG_BOND;
- # Copy this first token as blank, but use previous line number
- my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
- }
+ sub set_bond_strengths {
- # loop to copy all tokens on this line, with any changes
- my $type_sequence;
- for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
- $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- my $last_type_sequence = $type_sequence;
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my ($self) = @_;
- # Handle a blank space ...
- if ( $type eq 'b' ) {
+ # patch-its always ok to break at end of line
+ $nobreak_to_go[$max_index_to_go] = 0;
- # Delete it if not wanted by whitespace rules
- # or we are deleting all whitespace
- # Note that whitespace flag is a flag indicating whether a
- # white space BEFORE the token is needed
- next if ( $KK >= $Klast ); # skip terminal blank
- my $Knext = $KK + 1;
- my $ws = $rwhitespace_flags->[$Knext];
- if ( $ws == -1
- || $rOpts_delete_old_whitespace )
- {
+ my $rOpts_short_concatenation_item_length =
+ $rOpts->{'short-concatenation-item-length'};
- # FIXME: maybe switch to using _new
- my $Kp = $self->K_previous_nonblank($KK);
- next unless defined($Kp);
- my $token_p = $rLL->[$Kp]->[_TOKEN_];
- my $type_p = $rLL->[$Kp]->[_TYPE_];
+ # we start a new set of bias values for each line
+ my %bias;
+ @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
+ my $code_bias = -.01; # bias for closing block braces
- my ( $token_pp, $type_pp );
+ my $type = 'b';
+ my $token = ' ';
+ my $token_length = 1;
+ my $last_type;
+ my $last_nonblank_type = $type;
+ my $last_nonblank_token = $token;
+ my $list_str = $left_bond_strength{'?'};
- my $Kpp = $self->K_previous_nonblank($Kp);
- if ( defined($Kpp) ) {
- $token_pp = $rLL->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
- my $token_next = $rLL->[$Knext]->[_TOKEN_];
- my $type_next = $rLL->[$Knext]->[_TYPE_];
+ my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
+ $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
+ );
- my $do_not_delete = is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- );
+ # main loop to compute bond strengths between each pair of tokens
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ $last_type = $type;
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ }
+ $type = $types_to_go[$i];
- next unless ($do_not_delete);
- }
-
- # make it just one character if allowed
- if ($rOpts_add_whitespace) {
- $rtoken_vars->[_TOKEN_] = ' ';
- }
- $store_token->($rtoken_vars);
+ # strength on both sides of a blank is the same
+ if ( $type eq 'b' && $last_type ne 'b' ) {
+ $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
next;
}
- # Handle a nonblank token...
+ $token = $tokens_to_go[$i];
+ $token_length = $token_lengths_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $i_next = $i + 1;
+ $next_type = $types_to_go[$i_next];
+ $next_token = $tokens_to_go[$i_next];
+ $total_nesting_depth = $nesting_depth_to_go[$i_next];
+ $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- # check for a qw quote
- if ( $type eq 'q' ) {
+ # We are computing the strength of the bond between the current
+ # token and the NEXT token.
- # trim blanks from right of qw quotes
- # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
- # this)
- $token =~ s/\s*$//;
- $rtoken_vars->[_TOKEN_] = $token;
- $self->note_embedded_tab($input_line_number)
- if ( $token =~ "\t" );
+ #---------------------------------------------------------------
+ # Bond Strength Section 1:
+ # First Approximation.
+ # Use minimum of individual left and right tabulated bond
+ # strengths.
+ #---------------------------------------------------------------
+ my $bsr = $right_bond_strength{$type};
+ my $bsl = $left_bond_strength{$next_nonblank_type};
- if ($in_multiline_qw) {
+ # define right bond strengths of certain keywords
+ if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
+ $bsr = $right_bond_strength{$token};
+ }
+ elsif ( $token eq 'ne' or $token eq 'eq' ) {
+ $bsr = NOMINAL;
+ }
- # If we are at the end of a multiline qw ..
- if ( $in_multiline_qw == $KK ) {
+ # set terminal bond strength to the nominal value
+ # this will cause good preceding breaks to be retained
+ if ( $i_next_nonblank > $max_index_to_go ) {
+ $bsl = NOMINAL;
+ }
- # Split off the closing delimiter character
- # so that the formatter can put a line break there if necessary
- my $part1 = $token;
- my $part2 = substr( $part1, -1, 1, "" );
+ # define right bond strengths of certain keywords
+ if ( $next_nonblank_type eq 'k'
+ && defined( $left_bond_strength{$next_nonblank_token} ) )
+ {
+ $bsl = $left_bond_strength{$next_nonblank_token};
+ }
+ elsif ($next_nonblank_token eq 'ne'
+ or $next_nonblank_token eq 'eq' )
+ {
+ $bsl = NOMINAL;
+ }
+ elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
+ $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
+ }
- if ($part1) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q', $part1 );
- $store_token->($rcopy);
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
+ # Use the minimum of the left and right strengths. Note: it might
+ # seem that we would want to keep a NO_BREAK if either token has
+ # this value. This didn't work, for example because in an arrow
+ # list, it prevents the comma from separating from the following
+ # bare word (which is probably quoted by its arrow). So necessary
+ # NO_BREAK's have to be handled as special cases in the final
+ # section.
+ if ( !defined($bsr) ) { $bsr = VERY_STRONG }
+ if ( !defined($bsl) ) { $bsl = VERY_STRONG }
+ my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
+ my $bond_str_1 = $bond_str;
- }
- $in_multiline_qw = undef;
+ #---------------------------------------------------------------
+ # Bond Strength Section 2:
+ # Apply hardwired rules..
+ #---------------------------------------------------------------
- # store without preceding blank
- $store_token->($rtoken_vars);
- next;
+ # Patch to put terminal or clauses on a new line: Weaken the bond
+ # at an || followed by die or similar keyword to make the terminal
+ # or clause fall on a new line, like this:
+ #
+ # my $class = shift
+ # || die "Cannot add broadcast: No class identifier found";
+ #
+ # Otherwise the break will be at the previous '=' since the || and
+ # = have the same starting strength and the or is biased, like
+ # this:
+ #
+ # my $class =
+ # shift || die "Cannot add broadcast: No class identifier found";
+ #
+ # In any case if the user places a break at either the = or the ||
+ # it should remain there.
+ if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
+ if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
+ if ( $want_break_before{$token} && $i > 0 ) {
+ $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
+
+ # keep bond strength of a token and its following blank
+ # the same
+ if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
+ $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
+ }
}
else {
- # continuing a multiline qw
- $store_token->($rtoken_vars);
- next;
+ $bond_str -= $delta_bias;
}
}
+ }
- else {
-
- # we are encountered new qw token...see if multiline
- my $K_end = $K_end_q->($KK);
- if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
-
- # Starting multiline qw...
- # set flag equal to the ending K
- $in_multiline_qw = $K_end;
+ # good to break after end of code blocks
+ if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
- # Split off the leading part
- # so that the formatter can put a line break there if necessary
- if ( $token =~ /^(qw\s*.)(.*)$/ ) {
- my $part1 = $1;
- my $part2 = $2;
- if ($part2) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q',
- $part1 );
- $store_token_and_space->(
- $rcopy, $rwhitespace_flags->[$KK] == WS_YES
- );
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
+ $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
+ $code_bias += $delta_bias;
+ }
- # Second part goes without intermediate blank
- $store_token->($rtoken_vars);
- next;
- }
- }
- }
- else {
+ if ( $type eq 'k' ) {
- # this is a new single token qw -
- # store with possible preceding blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
- next;
- }
+ # allow certain control keywords to stand out
+ if ( $next_nonblank_type eq 'k'
+ && $is_last_next_redo_return{$token} )
+ {
+ $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
- } ## end if ( $type eq 'q' )
- # Modify certain tokens here for whitespace
- # The following is not yet done, but could be:
- # sub (x x x)
- elsif ( $type =~ /^[wit]$/ ) {
+ # Don't break after keyword my. This is a quick fix for a
+ # rare problem with perl. An example is this line from file
+ # Container.pm:
- # Examples: <<snippets/space1.in>>
- # change '$ var' to '$var' etc
- # '-> new' to '->new'
- if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
- $token =~ s/\s*//g;
- $rtoken_vars->[_TOKEN_] = $token;
+ # foreach my $question( Debian::DebConf::ConfigDb::gettree(
+ # $this->{'question'} ) )
+
+ if ( $token eq 'my' ) {
+ $bond_str = NO_BREAK;
}
- # Split identifiers with leading arrows, inserting blanks if
- # necessary. It is easier and safer here than in the
- # tokenizer. For example '->new' becomes two tokens, '->' and
- # 'new' with a possible blank between.
- #
- # Note: there is a related patch in sub set_whitespace_flags
- if ( $token =~ /^\-\>(.*)$/ && $1 ) {
- my $token_save = $1;
- my $type_save = $type;
+ }
- # store a blank to left of arrow if necessary
- my $Kprev = $self->K_previous_nonblank($KK);
- if ( defined($Kprev)
- && $rLL->[$Kprev]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace
- && $want_left_space{'->'} == WS_YES )
- {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- }
+ # good to break before 'if', 'unless', etc
+ if ( $is_if_brace_follower{$next_nonblank_token} ) {
+ $bond_str = VERY_WEAK;
+ }
- # then store the arrow
- my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
- $store_token->($rcopy);
+ if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
- # store a blank after the arrow if requested
- # added for issue git #33
- if ( $want_right_space{'->'} == WS_YES ) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- }
+ # FIXME: needs more testing
+ if ( $is_keyword_returning_list{$next_nonblank_token} ) {
+ $bond_str = $list_str if ( $bond_str > $list_str );
+ }
- # then reset the current token to be the remainder,
- # and reset the whitespace flag according to the arrow
- $token = $rtoken_vars->[_TOKEN_] = $token_save;
- $type = $rtoken_vars->[_TYPE_] = $type_save;
- $store_token->($rtoken_vars);
- next;
+ # keywords like 'unless', 'if', etc, within statements
+ # make good breaks
+ if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
+ $bond_str = VERY_WEAK / 1.05;
}
+ }
- if ( $token =~ /$ANYSUB_PATTERN/ ) {
+ # try not to break before a comma-arrow
+ elsif ( $next_nonblank_type eq '=>' ) {
+ if ( $bond_str < STRONG ) { $bond_str = STRONG }
+ }
- # -spp = 0 : no space before opening prototype paren
- # -spp = 1 : stable (follow input spacing)
- # -spp = 2 : always space before opening prototype paren
- my $spp = $rOpts->{'space-prototype-paren'};
- if ( defined($spp) ) {
- if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
- elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
- }
+ #---------------------------------------------------------------
+ # Additional hardwired NOBREAK rules
+ #---------------------------------------------------------------
- # one space max, and no tabs
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # map1.t -- correct for a quirk in perl
+ if ( $token eq '('
+ && $next_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'k'
+ && $is_sort_map_grep{$last_nonblank_token} )
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- if ( $type eq 'i' ) {
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # /^(sort|map|grep)$/ )
+ {
+ $bond_str = NO_BREAK;
}
- # change 'LABEL :' to 'LABEL:'
- elsif ( $type eq 'J' ) {
- $token =~ s/\s+//g;
- $rtoken_vars->[_TOKEN_] = $token;
+ # extrude.t: do not break before paren at:
+ # -l pid_filename(
+ if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
}
- # patch to add space to something like "x10"
- # This avoids having to split this token in the pre-tokenizer
- elsif ( $type eq 'n' ) {
- if ( $token =~ /^x\d+/ ) {
- $token =~ s/x/x /;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
+ # in older version of perl, use strict can cause problems with
+ # breaks before bare words following opening parens. For example,
+ # this will fail under older versions if a break is made between
+ # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
+ # command"); close MAIL;
+ if ( $type eq '{' ) {
- # check a quote for problems
- elsif ( $type eq 'Q' ) {
- $check_Q->( $KK, $Kfirst, $input_line_number );
+ if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
+
+ # but it's fine to break if the word is followed by a '=>'
+ # or if it is obviously a sub call
+ my $i_next_next_nonblank = $i_next_nonblank + 1;
+ my $next_next_type = $types_to_go[$i_next_next_nonblank];
+ if ( $next_next_type eq 'b'
+ && $i_next_nonblank < $max_index_to_go )
+ {
+ $i_next_next_nonblank++;
+ $next_next_type = $types_to_go[$i_next_next_nonblank];
+ }
+
+ # We'll check for an old breakpoint and keep a leading
+ # bareword if it was that way in the input file.
+ # Presumably it was ok that way. For example, the
+ # following would remain unchanged:
+ #
+ # @months = (
+ # January, February, March, April,
+ # May, June, July, August,
+ # September, October, November, December,
+ # );
+ #
+ # This should be sufficient:
+ if (
+ !$old_breakpoint_to_go[$i]
+ && ( $next_next_type eq ','
+ || $next_next_type eq '}' )
+ )
+ {
+ $bond_str = NO_BREAK;
+ }
+ }
}
- # handle semicolons
- elsif ( $type eq ';' ) {
+ # Do not break between a possible filehandle and a ? or / and do
+ # not introduce a break after it if there is no blank
+ # (extrude.t)
+ elsif ( $type eq 'Z' ) {
- # Remove unnecessary semicolons, but not after bare
- # blocks, where it could be unsafe if the brace is
- # mistokenized.
+ # don't break..
if (
- $rOpts->{'delete-semicolons'}
- && (
- (
- $last_nonblank_type eq '}'
- && (
- $is_block_without_semicolon{
- $last_nonblank_block_type}
- || $last_nonblank_block_type =~ /$SUB_PATTERN/
- || $last_nonblank_block_type =~ /^\w+:$/ )
- )
- || $last_nonblank_type eq ';'
+
+ # if there is no blank and we do not want one. Examples:
+ # print $x++ # do not break after $x
+ # print HTML"HELLO" # break ok after HTML
+ (
+ $next_type ne 'b'
+ && defined( $want_left_space{$next_type} )
+ && $want_left_space{$next_type} == WS_NO
)
+
+ # or we might be followed by the start of a quote
+ || $next_nonblank_type =~ /^[\/\?]$/
)
{
-
- # This looks like a deletable semicolon, but even if a
- # semicolon can be deleted it is necessarily best to do so.
- # We apply these additional rules for deletion:
- # - Always ok to delete a ';' at the end of a line
- # - Never delete a ';' before a '#' because it would
- # promote it to a block comment.
- # - If a semicolon is not at the end of line, then only
- # delete if it is followed by another semicolon or closing
- # token. This includes the comment rule. It may take
- # two passes to get to a final state, but it is a little
- # safer. For example, keep the first semicolon here:
- # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
- # It is not required but adds some clarity.
- my $ok_to_delete = 1;
- if ( $KK < $Klast ) {
- my $Kn = $self->K_next_nonblank($KK);
- if ( defined($Kn) && $Kn <= $Klast ) {
- my $next_nonblank_token_type =
- $rLL->[$Kn]->[_TYPE_];
- $ok_to_delete = $next_nonblank_token_type eq ';'
- || $next_nonblank_token_type eq '}';
- }
- }
-
- # do not delete only nonblank token in a file
- else {
- my $Kn = $self->K_next_nonblank($KK);
- $ok_to_delete = defined($Kn) || $nonblank_token_count;
- }
-
- if ($ok_to_delete) {
- $self->note_deleted_semicolon($input_line_number);
- next;
- }
- else {
- write_logfile_entry("Extra ';'\n");
- }
+ $bond_str = NO_BREAK;
}
}
- elsif ($type_sequence) {
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ if ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
+ }
- if ( $is_opening_token{$token} ) {
- my $seqno_parent = $seqno_stack{ $depth_next - 1 };
- $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
- push @{ $rchildren_of_seqno->{$seqno_parent} },
- $type_sequence;
- $rparent_of_seqno->{$type_sequence} = $seqno_parent;
- $seqno_stack{$depth_next} = $type_sequence;
- $KK_stack{$depth_next} = $KK;
- $depth_next++;
- }
- elsif ( $is_closing_token{$token} ) {
- $depth_next--;
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
+ }
- # keep track of broken lists for later formatting
- my $seqno_test = $seqno_stack{$depth_next};
- my $KK_open = $KK_stack{$depth_next};
- my $seqno_outer = $seqno_stack{ $depth_next - 1 };
- if ( defined($seqno_test)
- && defined($KK_open)
- && $seqno_test == $type_sequence )
- {
- my $lx_open = $rLL->[$KK_open]->[_LINE_INDEX_];
- my $lx_close = $rLL->[$KK]->[_LINE_INDEX_];
- if ( $lx_open < $lx_close ) {
- $ris_broken_container->{$type_sequence} =
- $lx_close - $lx_open;
- if ( defined($seqno_outer) ) {
- $rhas_broken_container->{$seqno_outer} = 1;
- }
- }
- }
+ my $bond_str_2 = $bond_str;
- # Insert a tentative missing semicolon if the next token is
- # a closing block brace
- if (
- $type eq '}'
- && $token eq '}'
+ #---------------------------------------------------------------
+ # End of hardwired rules
+ #---------------------------------------------------------------
- # not preceded by a ';'
- && $last_nonblank_type ne ';'
+ #---------------------------------------------------------------
+ # Bond Strength Section 3:
+ # Apply table rules. These have priority over the above
+ # hardwired rules.
+ #---------------------------------------------------------------
- # and this is not a VERSION stmt (is all one line, we are not
- # inserting semicolons on one-line blocks)
- && $CODE_type ne 'VER'
+ my $tabulated_bond_str;
+ my $ltype = $type;
+ my $rtype = $next_nonblank_type;
+ if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
+ if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
+ $rtype = $next_nonblank_type . $next_nonblank_token;
+ }
- # and we are allowed to add semicolons
- && $rOpts->{'add-semicolons'}
- )
- {
- $add_phantom_semicolon->($KK);
- }
- }
+ if ( $binary_bond_strength{$ltype}{$rtype} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$rtype};
+ $tabulated_bond_str = $bond_str;
}
- # Store this token with possible previous blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
+ if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
+ $bond_str = NO_BREAK;
+ $tabulated_bond_str = $bond_str;
+ }
+ my $bond_str_3 = $bond_str;
- } # End token loop
- } # End line loop
+ # If the hardwired rules conflict with the tabulated bond
+ # strength then there is an inconsistency that should be fixed
+ $DEBUG_BOND
+ && $tabulated_bond_str
+ && $bond_str_1
+ && $bond_str_1 != $bond_str_2
+ && $bond_str_2 != $tabulated_bond_str
+ && do {
+ print STDERR
+"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
+ };
- # Reset memory to be the new array
- $self->[_rLL_] = $rLL_new;
- $self->set_rLL_max_index();
- $self->[_K_opening_container_] = $K_opening_container;
- $self->[_K_closing_container_] = $K_closing_container;
- $self->[_K_opening_ternary_] = $K_opening_ternary;
- $self->[_K_closing_ternary_] = $K_closing_ternary;
- $self->[_rK_phantom_semicolons_] = $rK_phantom_semicolons;
- $self->[_rtype_count_by_seqno_] = $rtype_count_by_seqno;
- $self->[_ris_broken_container_] = $ris_broken_container;
- $self->[_rhas_broken_container_] = $rhas_broken_container;
- $self->[_rparent_of_seqno_] = $rparent_of_seqno;
- $self->[_rchildren_of_seqno_] = $rchildren_of_seqno;
+ #-----------------------------------------------------------------
+ # Bond Strength Section 4:
+ # Modify strengths of certain tokens which often occur in sequence
+ # by adding a small bias to each one in turn so that the breaks
+ # occur from left to right.
+ #
+ # Note that we only changing strengths by small amounts here,
+ # and usually increasing, so we should not be altering any NO_BREAKs.
+ # Other routines which check for NO_BREAKs will use a tolerance
+ # of one to avoid any problem.
+ #-----------------------------------------------------------------
- # make sure the new array looks okay
- $self->check_token_array();
+ # The bias tables use special keys
+ my $left_key = bias_table_key( $type, $token );
+ my $right_key =
+ bias_table_key( $next_nonblank_type, $next_nonblank_token );
- # reset the token limits of each line
- $self->resync_lines_and_tokens();
+ # add any bias set by sub scan_list at old comma break points.
+ if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
- return;
-}
+ # bias left token
+ elsif ( defined( $bias{$left_key} ) ) {
+ if ( !$want_break_before{$left_key} ) {
+ $bias{$left_key} += $delta_bias;
+ $bond_str += $bias{$left_key};
+ }
+ }
-{ ## begin closure scan_comments
+ # bias right token
+ if ( defined( $bias{$right_key} ) ) {
+ if ( $want_break_before{$right_key} ) {
- # This routine is called once per file at the start of processing to
- # make a pass through the lines, looking at lines of CODE and identifying
- # special processing needs, such format skipping sections marked by
- # special comments.
+ # for leading '.' align all but 'short' quotes; the idea
+ # is to not place something like "\n" on a single line.
+ if ( $right_key eq '.' ) {
+ unless (
+ $last_nonblank_type eq '.'
+ && ( $token_length <=
+ $rOpts_short_concatenation_item_length )
+ && ( !$is_closing_token{$token} )
+ )
+ {
+ $bias{$right_key} += $delta_bias;
+ }
+ }
+ else {
+ $bias{$right_key} += $delta_bias;
+ }
+ $bond_str += $bias{$right_key};
+ }
+ }
+ my $bond_str_4 = $bond_str;
- my $Last_line_had_side_comment;
- my $In_format_skipping_section;
- my $Saw_VERSION_in_this_file;
+ #---------------------------------------------------------------
+ # Bond Strength Section 5:
+ # Fifth Approximation.
+ # Take nesting depth into account by adding the nesting depth
+ # to the bond strength.
+ #---------------------------------------------------------------
+ my $strength;
- sub scan_comments {
- my $self = shift;
- my $rlines = $self->[_rlines_];
+ if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
+ if ( $total_nesting_depth > 0 ) {
+ $strength = $bond_str + $total_nesting_depth;
+ }
+ else {
+ $strength = $bond_str;
+ }
+ }
+ else {
+ $strength = NO_BREAK;
- $Last_line_had_side_comment = undef;
- $In_format_skipping_section = undef;
- $Saw_VERSION_in_this_file = undef;
+ # For critical code such as lines with here targets we must
+ # be absolutely sure that we do not allow a break. So for
+ # these the nobreak flag exceeds 1 as a signal. Otherwise we
+ # can run into trouble when small tolerances are added.
+ $strength += 1 if ( $nobreak_to_go[$i] > 1 );
+ }
- # Loop over all lines
- foreach my $line_of_tokens ( @{$rlines} ) {
- my $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $CODE_type = $self->get_CODE_type($line_of_tokens);
- $line_of_tokens->{_code_type} = $CODE_type;
- }
- return;
- }
+ #---------------------------------------------------------------
+ # Bond Strength Section 6:
+ # Sixth Approximation. Welds.
+ #---------------------------------------------------------------
- sub get_CODE_type {
- my ( $self, $line_of_tokens ) = @_;
+ # Do not allow a break within welds,
+ if ( $self->weld_len_right_to_go($i) ) { $strength = NO_BREAK }
- # We are looking at a line of code and setting a flag to
- # describe any special processing that it requires
+ # But encourage breaking after opening welded tokens
+ elsif ($self->weld_len_left_to_go($i)
+ && $is_opening_token{$token} )
+ {
+ $strength -= 1;
+ }
- # Possible CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'IO' = Indent Only - line goes out unchanged except for indentation
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
+ # always break after side comment
+ if ( $type eq '#' ) { $strength = 0 }
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
+ $bond_strength_to_go[$i] = $strength;
- my $rOpts_add_newlines = $rOpts->{'add-newlines'};
- my $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $DEBUG_BOND && do {
+ my $str = substr( $token, 0, 15 );
+ $str .= ' ' x ( 16 - length($str) );
+ print STDOUT
+"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
+ };
+ } ## end main loop
+ return;
+ } ## end sub set_bond_strengths
+} ## end closure set_bond_strengths
- my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
- my $no_internal_newlines = 1 - $rOpts_add_newlines;
- if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
+sub bad_pattern {
- # extract what we need for this line..
+ # See if a pattern will compile. We have to use a string eval here,
+ # but it should be safe because the pattern has been constructed
+ # by this program.
+ my ($pattern) = @_;
+ eval "'##'=~/$pattern/";
+ return $@;
+}
- my $input_line_number = $line_of_tokens->{_line_number};
+{ ## begin closure prepare_cuddled_block_types
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $jmax = -1;
- if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
- my $input_line = $line_of_tokens->{_line_text};
+ my %no_cuddle;
- my $is_static_block_comment = 0;
+ # Add keywords here which really should not be cuddled
+ BEGIN {
+ my @q = qw(if unless for foreach while);
+ @no_cuddle{@q} = (1) x scalar(@q);
+ }
- # Handle a continued quote..
- if ( $line_of_tokens->{_starting_in_quote} ) {
+ sub prepare_cuddled_block_types {
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- $self->note_embedded_tab($input_line_number);
- }
- $Last_line_had_side_comment = 0;
- return 'VB';
- }
- }
+ # the cuddled-else style, if used, is controlled by a hash that
+ # we construct here
- my $is_block_comment =
- ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
+ # Include keywords here which should not be cuddled
- # Write line verbatim if we are in a formatting skip section
- if ($In_format_skipping_section) {
- $Last_line_had_side_comment = 0;
+ my $cuddled_string = "";
+ if ( $rOpts->{'cuddled-else'} ) {
- # Note: extra space appended to comment simplifies pattern matching
- if ( $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_end/ )
- {
- $In_format_skipping_section = 0;
- write_logfile_entry("Exiting formatting skip section\n");
+ # set the default
+ $cuddled_string = 'elsif else continue catch finally'
+ unless ( $rOpts->{'cuddled-block-list-exclusive'} );
+
+ # This is the old equivalent but more complex version
+ # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
+
+ # Add users other blocks to be cuddled
+ my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
+ if ($cuddled_block_list) {
+ $cuddled_string .= " " . $cuddled_block_list;
}
- return 'FS';
- }
- # See if we are entering a formatting skip section
- if ( $rOpts_format_skipping
- && $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_begin/ )
- {
- $In_format_skipping_section = 1;
- write_logfile_entry("Entering formatting skip section\n");
- $Last_line_had_side_comment = 0;
- return 'FS';
}
- # ignore trailing blank tokens (they will get deleted later)
- if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
- $jmax--;
- }
+ # If we have a cuddled string of the form
+ # 'try-catch-finally'
- # Handle a blank line..
- if ( $jmax < 0 ) {
- $Last_line_had_side_comment = 0;
- return 'BL';
- }
+ # we want to prepare a hash of the form
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment_without_leading_space = 0;
- if ( $is_block_comment
- && $rOpts->{'static-block-comments'}
- && $input_line =~ /$static_block_comment_pattern/ )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space =
- substr( $input_line, 0, 1 ) eq '#';
- }
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # Check for comments which are line directives
- # Treat exactly as static block comments without leading space
- # reference: perlsyn, near end, section Plain Old Comments (Not!)
- # example: '# line 42 "new_filename.plx"'
- if (
- $is_block_comment
- && $input_line =~ /^\# \s*
- line \s+ (\d+) \s*
- (?:\s("?)([^"]+)\2)? \s*
- $/x
- )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space = 1;
- }
+ # use -dcbl to dump this hash
- # look for hanging side comment
- if (
- $is_block_comment
- && $Last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
- && !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing
- # hanging side comments
- # like this
- )
- {
- $Last_line_had_side_comment = 1;
- return 'HSC';
- }
+ # Multiple such strings are input as a space or comma separated list
- # remember if this line has a side comment
- $Last_line_had_side_comment =
- ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
+ # If we get two lists with the same leading type, such as
+ # -cbl = "-try-catch-finally -try-catch-otherwise"
+ # then they will get merged as follows:
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 2,
+ # 'otherwise' => 1,
+ # },
+ # };
+ # This will allow either type of chain to be followed.
- # Handle a block (full-line) comment..
- if ($is_block_comment) {
+ $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
+ my @cuddled_strings = split /\s+/, $cuddled_string;
- # TRIM COMMENTS -- This could be turned off as a option
- $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
+ $rcuddled_block_types = {};
- if ($is_static_block_comment_without_leading_space) {
- return 'SBCX';
- }
- elsif ($is_static_block_comment) {
- return 'SBC';
- }
- else {
- return 'BC';
- }
- }
+ # process each dash-separated string...
+ my $string_count = 0;
+ foreach my $string (@cuddled_strings) {
+ next unless $string;
+ my @words = split /-+/, $string; # allow multiple dashes
- # Patch needed for MakeMaker. Do not break a statement
- # in which $VERSION may be calculated. See MakeMaker.pm;
- # this is based on the coding in it.
- # The first line of a file that matches this will be eval'd:
- # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- # Examples:
- # *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
- # We will pass such a line straight through without breaking
- # it unless -npvl is used.
+ # we could look for and report possible errors here...
+ next unless ( @words > 0 );
- # Patch for problem reported in RT #81866, where files
- # had been flattened into a single line and couldn't be
- # tidied without -npvl. There are two parts to this patch:
- # First, it is not done for a really long line (80 tokens for now).
- # Second, we will only allow up to one semicolon
- # before the VERSION. We need to allow at least one semicolon
- # for statements like this:
- # require Exporter; our $VERSION = $Exporter::VERSION;
- # where both statements must be on a single line for MakeMaker
+ # allow either '-continue' or *-continue' for arbitrary starting type
+ my $start = '*';
- my $is_VERSION_statement = 0;
- if ( !$Saw_VERSION_in_this_file
- && $jmax < 80
- && $input_line =~
- /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
- {
- $Saw_VERSION_in_this_file = 1;
- write_logfile_entry("passing VERSION line; -npvl deactivates\n");
+ # a single word without dashes is a secondary block type
+ if ( @words > 1 ) {
+ $start = shift @words;
+ }
- # This code type has lower priority than others
- $CODE_type = 'VER' unless ($CODE_type);
+ # always make an entry for the leading word. If none follow, this
+ # will still prevent a wildcard from matching this word.
+ if ( !defined( $rcuddled_block_types->{$start} ) ) {
+ $rcuddled_block_types->{$start} = {};
+ }
+
+ # The count gives the original word order in case we ever want it.
+ $string_count++;
+ my $word_count = 0;
+ foreach my $word (@words) {
+ next unless $word;
+ if ( $no_cuddle{$word} ) {
+ Warn(
+"## Ignoring keyword '$word' in -cbl; does not seem right\n"
+ );
+ next;
+ }
+ $word_count++;
+ $rcuddled_block_types->{$start}->{$word} =
+ 1; #"$string_count.$word_count";
+
+ # git#9: Remove this word from the list of desired one-line
+ # blocks
+ $want_one_line_block{$word} = 0;
+ }
}
- return $CODE_type;
+ return;
}
-} ## end closure scan_comments
-
-sub find_nested_pairs {
- my $self = shift;
+} ## begin closure prepare_cuddled_block_types
- # This routine is called once per file to do preliminary work needed for
- # the --weld-nested option. This information is also needed for adding
- # semicolons.
+sub dump_cuddled_block_list {
+ my ($fh) = @_;
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # ORIGINAL METHOD: Here is the format of the cuddled block type hash
+ # which controls this routine
+ # my $rcuddled_block_types = {
+ # 'if' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # },
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # We define an array of pairs of nested containers
- my @nested_pairs;
+ # SIMPLFIED METHOD: the simplified method uses a wildcard for
+ # the starting block type and puts all cuddled blocks together:
+ # my $rcuddled_block_types = {
+ # '*' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # We also set the following hash values to identify container pairs for
- # which the opening and closing tokens are adjacent in the token stream:
- # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
- # $seqno_in are the seqence numbers of the outer and inner containers of
- # the pair We need these later to decide if we can insert a missing
- # semicolon
- my $rpaired_to_inner_container = {};
+ # Both methods work, but the simplified method has proven to be adequate and
+ # easier to manage.
- # This local hash remembers if an outer container has a close following
- # inner container;
- # The key is the outer sequence number
- # The value is the token_hash of the inner container
+ my $cuddled_string = $rOpts->{'cuddled-block-list'};
+ $cuddled_string = '' unless $cuddled_string;
- my %has_close_following_opening;
+ my $flags = "";
+ $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
+ $flags .= " -cbl='$cuddled_string'";
- # Names of calling routines can either be marked as 'i' or 'w',
- # and they may invoke a sub call with an '->'. We will consider
- # any consecutive string of such types as a single unit when making
- # weld decisions. We also allow a leading !
- my $is_name_type = {
- 'i' => 1,
- 'w' => 1,
- 'U' => 1,
- '->' => 1,
- '!' => 1,
- };
+ unless ( $rOpts->{'cuddled-else'} ) {
+ $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
+ }
- my $is_name = sub {
- my $type = shift;
- return $type && $is_name_type->{$type};
- };
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+Hash of cuddled block types prepared for a run with these parameters:
+ $flags
+------------------------------------------------------------------------
+EOM
- my $last_container;
- my $last_last_container;
- my $last_nonblank_token_vars;
- my $last_count;
+ use Data::Dumper;
+ $fh->print( Dumper($rcuddled_block_types) );
- my $nonblank_token_count = 0;
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+EOM
+ return;
+}
- # loop over all tokens
- foreach my $rtoken_vars ( @{$rLL} ) {
+sub make_static_block_comment_pattern {
- my $type = $rtoken_vars->[_TYPE_];
+ # create the pattern used to identify static block comments
+ $static_block_comment_pattern = '^\s*##';
- next if ( $type eq 'b' );
+ # allow the user to change it
+ if ( $rOpts->{'static-block-comment-prefix'} ) {
+ my $prefix = $rOpts->{'static-block-comment-prefix'};
+ $prefix =~ s/^\s*//;
+ my $pattern = $prefix;
- # long identifier-like items are counted as a single item
- $nonblank_token_count++
- unless ( $is_name->($type)
- && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
-
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
-
- my $token = $rtoken_vars->[_TOKEN_];
-
- if ( $is_opening_token{$token} ) {
-
- # following previous opening token ...
- if ( $last_container
- && $is_opening_token{ $last_container->[_TOKEN_] } )
- {
-
- # adjacent to this one
- my $tok_diff = $nonblank_token_count - $last_count;
-
- my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
-
- if ( $tok_diff == 1
- || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
- {
-
- # remember this pair...
- my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $inner_seqno = $type_sequence;
- $has_close_following_opening{$outer_seqno} =
- $rtoken_vars;
- }
- }
+ # user may give leading caret to force matching left comments only
+ if ( $prefix !~ /^\^#/ ) {
+ if ( $prefix !~ /^#/ ) {
+ Die(
+"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
+ );
}
+ $pattern = '^\s*' . $prefix;
+ }
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $static_block_comment_pattern = $pattern;
+ }
+ return;
+}
- elsif ( $is_closing_token{$token} ) {
-
- # if the corresponding opening token had an adjacent opening
- if ( $has_close_following_opening{$type_sequence}
- && $is_closing_token{ $last_container->[_TOKEN_] }
- && $has_close_following_opening{$type_sequence}
- ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
- {
+sub make_format_skipping_pattern {
+ my ( $opt_name, $default ) = @_;
+ my $param = $rOpts->{$opt_name};
+ unless ($param) { $param = $default }
+ $param =~ s/^\s*//;
+ if ( $param !~ /^#/ ) {
+ Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
+ }
+ my $pattern = '^' . $param . '\s';
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
+ );
+ }
+ return $pattern;
+}
- # The closing weld tokens must be adjacent
- # NOTE: so intermediate commas and semicolons
- # can currently block a weld. This is something
- # that could be fixed in the future by including
- # a flag to delete un-necessary commas and semicolons.
- my $tok_diff = $nonblank_token_count - $last_count;
+sub make_non_indenting_brace_pattern {
- if ( $tok_diff == 1 ) {
+ # Create the pattern used to identify static side comments.
+ # Note that we are ending the pattern in a \s. This will allow
+ # the pattern to be followed by a space and some text, or a newline.
+ # The pattern is used in sub 'non_indenting_braces'
+ $non_indenting_brace_pattern = '^#<<<\s';
- # This is a closely nested pair ..
- my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $outer_seqno = $type_sequence;
- $rpaired_to_inner_container->{$outer_seqno} =
- $inner_seqno;
+ # allow the user to change it
+ if ( $rOpts->{'non-indenting-brace-prefix'} ) {
+ my $prefix = $rOpts->{'non-indenting-brace-prefix'};
+ $prefix =~ s/^\s*//;
+ if ( $prefix !~ /^#/ ) {
+ Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
+ }
+ my $pattern = '^' . $prefix . '\s';
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $non_indenting_brace_pattern = $pattern;
+ }
+ return;
+}
- push @nested_pairs, [ $inner_seqno, $outer_seqno ];
- }
- }
- }
+sub make_closing_side_comment_list_pattern {
- $last_last_container = $last_container;
- $last_container = $rtoken_vars;
- $last_count = $nonblank_token_count;
- }
- $last_nonblank_token_vars = $rtoken_vars;
+ # turn any input list into a regex for recognizing selected block types
+ $closing_side_comment_list_pattern = '^\w+';
+ if ( defined( $rOpts->{'closing-side-comment-list'} )
+ && $rOpts->{'closing-side-comment-list'} )
+ {
+ $closing_side_comment_list_pattern =
+ make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
}
- $self->[_rnested_pairs_] = \@nested_pairs;
- $self->[_rpaired_to_inner_container_] = $rpaired_to_inner_container;
return;
}
-sub Debug_dump_tokens {
+sub make_sub_matching_pattern {
- # a debug routine, not normally used
- my ( $self, $msg ) = @_;
- my $rLL = $self->[_rLL_];
- my $nvars = @{$rLL};
- print STDERR "$msg\n";
- print STDERR "ntokens=$nvars\n";
- print STDERR "K\t_TOKEN_\t_TYPE_\n";
- my $K = 0;
+ # Patterns for standardizing matches to block types for regular subs and
+ # anonymous subs. Examples
+ # 'sub process' is a named sub
+ # 'sub ::m' is a named sub
+ # 'sub' is an anonymous sub
+ # 'sub:' is a label, not a sub
+ # 'substr' is a keyword
+ $SUB_PATTERN = '^sub\s+(::|\w)'; # match normal sub
+ $ASUB_PATTERN = '^sub$'; # match anonymous sub
+ $ANYSUB_PATTERN = '^sub\b'; # match either type of sub
- foreach my $item ( @{$rLL} ) {
- print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
- $K++;
+ # Note (see also RT #133130): These patterns are used by
+ # sub make_block_pattern, which is used for making most patterns.
+ # So this sub needs to be called before other pattern-making routines.
+
+ if ( $rOpts->{'sub-alias-list'} ) {
+
+ # Note that any 'sub-alias-list' has been preprocessed to
+ # be a trimmed, space-separated list which includes 'sub'
+ # for example, it might be 'sub method fun'
+ my $sub_alias_list = $rOpts->{'sub-alias-list'};
+ $sub_alias_list =~ s/\s+/\|/g;
+ $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
}
return;
}
-sub get_old_line_index {
+sub make_bli_pattern {
- # return index of the original line that token K was on
- my ( $self, $K ) = @_;
- my $rLL = $self->[_rLL_];
- return 0 unless defined($K);
- return $rLL->[$K]->[_LINE_INDEX_];
-}
+ # default list of block types for which -bli would apply
+ my $bli_list_string = 'if else elsif unless while for foreach do : sub';
-sub get_old_line_count {
+ if ( defined( $rOpts->{'brace-left-and-indent-list'} )
+ && $rOpts->{'brace-left-and-indent-list'} )
+ {
+ $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
+ }
- # return number of input lines separating two tokens
- my ( $self, $Kbeg, $Kend ) = @_;
- my $rLL = $self->[_rLL_];
- return 0 unless defined($Kbeg);
- return 0 unless defined($Kend);
- return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
+ $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
+ return;
}
-sub K_next_code {
- my ( $self, $KK, $rLL ) = @_;
-
- # return the index K of the next nonblank, non-comment token
- return unless ( defined($KK) && $KK >= 0 );
+sub make_keyword_group_list_pattern {
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
- && $rLL->[$Knnb]->[_TYPE_] ne '#' )
- {
- return $Knnb;
+ # turn any input list into a regex for recognizing selected block types.
+ # Here are the defaults:
+ $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
+ $keyword_group_list_comment_pattern = '';
+ if ( defined( $rOpts->{'keyword-group-blanks-list'} )
+ && $rOpts->{'keyword-group-blanks-list'} )
+ {
+ my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
+ my @keyword_list;
+ my @comment_list;
+ foreach my $word (@words) {
+ if ( $word =~ /^(BC|SBC)$/ ) {
+ push @comment_list, $word;
+ if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
+ }
+ else {
+ push @keyword_list, $word;
+ }
}
- $Knnb++;
+ $keyword_group_list_pattern =
+ make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
+ $keyword_group_list_comment_pattern =
+ make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
}
return;
}
-sub K_next_nonblank {
- my ( $self, $KK, $rLL ) = @_;
-
- # return the index K of the next nonblank token
- return unless ( defined($KK) && $KK >= 0 );
+sub make_block_brace_vertical_tightness_pattern {
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
- $Knnb++;
+ # turn any input list into a regex for recognizing selected block types
+ $block_brace_vertical_tightness_pattern =
+ '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
+ && $rOpts->{'block-brace-vertical-tightness-list'} )
+ {
+ $block_brace_vertical_tightness_pattern =
+ make_block_pattern( '-bbvtl',
+ $rOpts->{'block-brace-vertical-tightness-list'} );
}
return;
}
-sub K_previous_code {
+sub make_blank_line_pattern {
- # return the index K of the previous nonblank, non-comment token
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
-
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
-
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
+ $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
+ my $key = 'blank-lines-before-closing-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_before_closing_block_pattern =
+ make_block_pattern( '-blbcl', $rOpts->{$key} );
}
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
- && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
- {
- return $Kpnb;
- }
- $Kpnb--;
+
+ $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
+ $key = 'blank-lines-after-opening-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_after_opening_block_pattern =
+ make_block_pattern( '-blaol', $rOpts->{$key} );
}
return;
}
-sub K_previous_nonblank {
+sub make_block_pattern {
- # return index of previous nonblank token before item K;
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ # given a string of block-type keywords, return a regex to match them
+ # The only tricky part is that labels are indicated with a single ':'
+ # and the 'sub' token text may have additional text after it (name of
+ # sub).
+ #
+ # Example:
+ #
+ # input string: "if else elsif unless while for foreach do : sub";
+ # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # Minor Update:
+ #
+ # To distinguish between anonymous subs and named subs, use 'sub' to
+ # indicate a named sub, and 'asub' to indicate an anonymous sub
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
+ my ( $abbrev, $string ) = @_;
+ my @list = split_words($string);
+ my @words = ();
+ my %seen;
+ for my $i (@list) {
+ if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
+ next if $seen{$i};
+ $seen{$i} = 1;
+ if ( $i eq 'sub' ) {
+ }
+ elsif ( $i eq 'asub' ) {
+ }
+ elsif ( $i eq ';' ) {
+ push @words, ';';
+ }
+ elsif ( $i eq '{' ) {
+ push @words, '\{';
+ }
+ elsif ( $i eq ':' ) {
+ push @words, '\w+:';
+ }
+ elsif ( $i =~ /^\w/ ) {
+ push @words, $i;
+ }
+ else {
+ Warn("unrecognized block type $i after $abbrev, ignoring\n");
+ }
}
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
- $Kpnb--;
+ my $pattern = '(' . join( '|', @words ) . ')$';
+ my $sub_patterns = "";
+ if ( $seen{'sub'} ) {
+ $sub_patterns .= '|' . $SUB_PATTERN;
}
- return;
+ if ( $seen{'asub'} ) {
+ $sub_patterns .= '|' . $ASUB_PATTERN;
+ }
+ if ($sub_patterns) {
+ $pattern = '(' . $pattern . $sub_patterns . ')';
+ }
+ $pattern = '^' . $pattern;
+ return $pattern;
}
-sub mark_short_nested_blocks {
-
- # This routine looks at the entire file and marks any short nested blocks
- # which should not be broken. The results are stored in the hash
- # $rshort_nested->{$type_sequence}
- # which will be true if the container should remain intact.
- #
- # For example, consider the following line:
-
- # sub cxt_two { sort { $a <=> $b } test_if_list() }
-
- # The 'sort' block is short and nested within an outer sub block.
- # Normally, the existance of the 'sort' block will force the sub block to
- # break open, but this is not always desirable. Here we will set a flag for
- # the sort block to prevent this. To give the user control, we will
- # follow the input file formatting. If either of the blocks is broken in
- # the input file then we will allow it to remain broken. Otherwise we will
- # set a flag to keep it together in later formatting steps.
+sub make_static_side_comment_pattern {
- # The flag which is set here will be checked in two places:
- # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
+ # create the pattern used to identify static side comments
+ $static_side_comment_pattern = '^##';
- my $self = shift;
- return if $rOpts->{'indent-only'};
+ # allow the user to change it
+ if ( $rOpts->{'static-side-comment-prefix'} ) {
+ my $prefix = $rOpts->{'static-side-comment-prefix'};
+ $prefix =~ s/^\s*//;
+ my $pattern = '^' . $prefix;
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $static_side_comment_pattern = $pattern;
+ }
+ return;
+}
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+sub make_closing_side_comment_prefix {
- return unless ( $rOpts->{'one-line-block-nesting'} );
+ # Be sure we have a valid closing side comment prefix
+ my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
+ my $csc_prefix_pattern;
+ if ( !defined($csc_prefix) ) {
+ $csc_prefix = '## end';
+ $csc_prefix_pattern = '^##\s+end';
+ }
+ else {
+ my $test_csc_prefix = $csc_prefix;
+ if ( $test_csc_prefix !~ /^#/ ) {
+ $test_csc_prefix = '#' . $test_csc_prefix;
+ }
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rlines = $self->[_rlines_];
+ # make a regex to recognize the prefix
+ my $test_csc_prefix_pattern = $test_csc_prefix;
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
- my $length_tol = 1;
+ # escape any special characters
+ $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
- my $excess_length_to_K = sub {
- my ($K) = @_;
+ $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
+ # allow exact number of intermediate spaces to vary
+ $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
- my $is_broken_block = sub {
+ # make sure we have a good pattern
+ # if we fail this we probably have an error in escaping
+ # characters.
- # a block is broken if the input line numbers of the braces differ
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ if ( bad_pattern($test_csc_prefix_pattern) ) {
- # loop over all containers
- my @open_block_stack;
- my $iline = -1;
- my $KNEXT = 0;
- my $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
+ # shouldn't happen..must have screwed up escaping, above
+ report_definite_bug();
+ Warn(
+"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
+ );
- # an error here is most likely due to a recent programming change
- Fault("sequence = $type_sequence not defined at K=$KK");
+ # just warn and keep going with defaults
+ Warn("Please consider using a simpler -cscp prefix\n");
+ Warn("Using default -cscp instead; please check output\n");
}
-
- # We are just looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- next unless ($block_type);
-
- # Keep a stack of all acceptable block braces seen.
- # Only consider blocks entirely on one line so dump the stack when line
- # changes.
- my $iline_last = $iline;
- $iline = $rLL->[$KK]->[_LINE_INDEX_];
- if ( $iline != $iline_last ) { @open_block_stack = () }
-
- if ( $token eq '}' ) {
- if (@open_block_stack) { pop @open_block_stack }
+ else {
+ $csc_prefix = $test_csc_prefix;
+ $csc_prefix_pattern = $test_csc_prefix_pattern;
}
- next unless ( $token eq '{' );
+ }
+ $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
+ $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
+ return;
+}
- # block must be balanced (bad scripts may be unbalanced)
- my $K_opening = $K_opening_container->{$type_sequence};
- my $K_closing = $K_closing_container->{$type_sequence};
- next unless ( defined($K_opening) && defined($K_closing) );
+##################################################
+# CODE SECTION 4: receive lines from the tokenizer
+##################################################
- # require that this block be entirely on one line
- next if ( $is_broken_block->($type_sequence) );
+sub write_line {
- # See if this block fits on one line of allowed length (which may
- # be different from the input script)
- $starting_lentot =
- $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$KK]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
-
- # Dump the stack if block is too long and skip this block
- if ( $excess_length_to_K->($K_closing) > 0 ) {
- @open_block_stack = ();
- next;
- }
-
- # OK, Block passes tests, remember it
- push @open_block_stack, $type_sequence;
-
- # We are only marking nested code blocks,
- # so check for a previous block on the stack
- next unless ( @open_block_stack > 1 );
+ # This routine originally received lines of code and immediately processed
+ # them. That was efficient when memory was limited, but now it just saves
+ # the lines it receives. They get processed all together after the last
+ # line is received.
- # Looks OK, mark this as a short nested block
- $rshort_nested->{$type_sequence} = 1;
+ # As tokenized lines are received they are converted to the format needed
+ # for the final formatting.
+ my ( $self, $line_of_tokens_old ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines_new = $self->[_rlines_];
+ my $Kfirst;
+ my $line_of_tokens = {};
+ foreach my $key (
+ qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _square_bracket_depth
+ _starting_in_quote
+ )
+ )
+ {
+ $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
}
- return;
-}
-sub weld_containers {
+ # Data needed by Logger
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = "";
+ $line_of_tokens->{_nesting_tokens_0} = "";
- # Called once per file to do any welding operations requested by --weld*
- # flags.
- my ($self) = @_;
+ # Needed to avoid trimming quotes
+ $line_of_tokens->{_ended_in_blank_token} = undef;
- return if ( $rOpts->{'indent-only'} );
- return unless ( $rOpts->{'add-newlines'} );
+ my $line_type = $line_of_tokens_old->{_line_type};
+ my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
+ if ( $line_type eq 'CODE' ) {
- if ( $rOpts->{'weld-nested-containers'} ) {
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
+ my $rblock_type = $line_of_tokens_old->{_rblock_type};
+ my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
+ my $rcontainer_environment =
+ $line_of_tokens_old->{_rcontainer_environment};
+ my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
+ my $rlevels = $line_of_tokens_old->{_rlevels};
+ my $rslevels = $line_of_tokens_old->{_rslevels};
+ my $rci_levels = $line_of_tokens_old->{_rci_levels};
+ my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
+ my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
- # if called, weld_nested_containers must be called before other weld
- # operations. # This is because weld_nested_containers could overwrite
- # hash values written by weld_cuddled_blocks and weld_nested_quotes.
- $self->weld_nested_containers();
+ my $jmax = @{$rtokens} - 1;
+ if ( $jmax >= 0 ) {
+ $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
+ foreach my $j ( 0 .. $jmax ) {
- $self->weld_nested_quotes();
- }
+ # Clip negative nesting depths to zero to avoid problems.
+ # Negative values can occur in files with unbalanced containers
+ my $slevel = $rslevels->[$j];
+ if ( $slevel < 0 ) { $slevel = 0 }
- # Note that weld_nested_containers() changes the _LEVEL_ values, so
- # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
+ # But do not clip the 'level' variable yet. We will do this later,
+ # in sub 'store_token_to_go'. The reason is that in files with
+ # level errors, the logic in 'weld_cuddled_else' uses a stack
+ # logic that will give bad welds if we clip levels here.
+ ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
- # Here is a good test case to Be sure that both cuddling and welding
- # are working and not interfering with each other: <<snippets/ce_wn1.in>>
+ my @tokary;
+ @tokary[
+ _TOKEN_, _TYPE_,
+ _BLOCK_TYPE_, _CONTAINER_TYPE_,
+ _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
+ _LEVEL_, _LEVEL_TRUE_,
+ _SLEVEL_, _CI_LEVEL_,
+ _LINE_INDEX_,
+ ]
+ = (
+ $rtokens->[$j], $rtoken_type->[$j],
+ $rblock_type->[$j], $rcontainer_type->[$j],
+ $rcontainer_environment->[$j], $rtype_sequence->[$j],
+ $rlevels->[$j], $rlevels->[$j],
+ $slevel, $rci_levels->[$j],
+ $input_line_no,
+ );
+ push @{$rLL}, \@tokary;
+ }
- # perltidy -wn -ce
+ $Klimit = @{$rLL} - 1;
- # if ($BOLD_MATH) { (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # ) } else { (
- # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
- # $after
- # ) }
+ # Need to remember if we can trim the input line
+ $line_of_tokens->{_ended_in_blank_token} =
+ $rtoken_type->[$jmax] eq 'b';
- $self->weld_cuddled_blocks();
+ $line_of_tokens->{_level_0} = $rlevels->[0];
+ $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
+ $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
+ $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
+ }
+ }
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
+ $line_of_tokens->{_code_type} = "";
+ $self->[_Klimit_] = $Klimit;
+
+ push @{$rlines_new}, $line_of_tokens;
return;
}
-sub cumulative_length_before_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->[_rLL_];
- return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
-}
+#############################################
+# CODE SECTION 5: Pre-process the entire file
+#############################################
-sub cumulative_length_after_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->[_rLL_];
- return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
-}
+sub finish_formatting {
-sub weld_cuddled_blocks {
- my ($self) = @_;
+ my ( $self, $severe_error ) = @_;
- # Called once per file to handle cuddled formatting
+ # The file has been tokenized and is ready to be formatted.
+ # All of the relevant data is stored in $self, ready to go.
- my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
+ # output file verbatim if severe error or no formatting requested
+ if ( $severe_error || $rOpts->{notidy} ) {
+ $self->dump_verbatim();
+ $self->wrapup();
+ return;
+ }
- # This routine implements the -cb flag by finding the appropriate
- # closing and opening block braces and welding them together.
- return unless ( %{$rcuddled_block_types} );
+ # Make a pass through the lines, looking at lines of CODE and identifying
+ # special processing needs, such format skipping sections marked by
+ # special comments
+ $self->scan_comments();
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $rbreak_container = $self->[_rbreak_container_];
+ # Find nested pairs of container tokens for any welding. This information
+ # is also needed for adding semicolons, so it is split apart from the
+ # welding step.
+ $self->find_nested_pairs();
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
+ # Make sure everything looks good
+ $self->check_line_hashes();
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # Future: Place to Begin future Iteration Loop
+ # foreach my $it_count(1..$maxit) {
- my $is_broken_block = sub {
+ # Future: We must reset some things after the first iteration.
+ # This includes:
+ # - resetting levels if there was any welding
+ # - resetting any phantom semicolons
+ # - dealing with any line numbering issues so we can relate final lines
+ # line numbers with input line numbers.
+ #
+ # If ($it_count>1) {
+ # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
+ # Renumber lines
+ # }
- # a block is broken if the input line numbers of the braces differ
- # we can only cuddle between broken blocks
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ # Make a pass through all tokens, adding or deleting any whitespace as
+ # required. Also make any other changes, such as adding semicolons.
+ # All token changes must be made here so that the token data structure
+ # remains fixed for the rest of this iteration.
+ $self->respace_tokens();
- # A stack to remember open chains at all levels: This is a hash rather than
- # an array for safety because negative levels can occur in files with
- # errors. This allows us to keep processing with negative levels.
- # $in_chain{$level} = [$chain_type, $type_sequence];
- my %in_chain;
- my $CBO = $rOpts->{'cuddled-break-option'};
+ # Implement any welding needed for the -wn or -cb options
+ $self->weld_containers();
- # loop over structure items to find cuddled pairs
- my $level = 0;
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $type_sequence not defined at K=$KK");
- }
+ # Locate small nested blocks which should not be broken
+ $self->mark_short_nested_blocks();
- # We use the original levels because they get changed by sub
- # 'weld_nested_containers'. So if this were to be called before that
- # routine, the levels would be wrong and things would go bad.
- my $last_level = $level;
- $level = $rtoken_vars->[_LEVEL_TRUE_];
+ $self->adjust_indentation_levels();
- if ( $level < $last_level ) { $in_chain{$last_level} = undef }
- elsif ( $level > $last_level ) { $in_chain{$level} = undef }
+ # Finishes formatting and write the result to the line sink.
+ # Eventually this call should just change the 'rlines' data according to the
+ # new line breaks and then return so that we can do an internal iteration
+ # before continuing with the next stages of formatting.
+ $self->process_all_lines();
- # We are only looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
+ ############################################################
+ # A possible future decomposition of 'process_all_lines()' follows.
+ # Benefits:
+ # - allow perltidy to do an internal iteration which eliminates
+ # many unnecessary steps, such as re-parsing and vertical alignment.
+ # This will allow iterations to be automatic.
+ # - consolidate all length calculations to allow utf8 alignment
+ ############################################################
- if ( $token eq '{' ) {
+ # Future: Check for convergence of beginning tokens on CODE lines
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- if ( !$block_type ) {
+ # Future: End of Iteration Loop
- # patch for unrecognized block types which may not be labeled
- my $Kp = $self->K_previous_nonblank($KK);
- while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
- $Kp = $self->K_previous_nonblank($Kp);
- }
- next unless $Kp;
- $block_type = $rLL->[$Kp]->[_TOKEN_];
+ # Future: add_padding($rargs);
- }
- if ( $in_chain{$level} ) {
+ # Future: add_closing_side_comments($rargs);
- # we are in a chain and are at an opening block brace.
- # See if we are welding this opening brace with the previous
- # block brace. Get their identification numbers:
- my $closing_seqno = $in_chain{$level}->[1];
- my $opening_seqno = $type_sequence;
+ # Future: vertical_alignment($rargs);
- # The preceding block must be on multiple lines so that its
- # closing brace will start a new line.
- if ( !$is_broken_block->($closing_seqno) ) {
- next unless ( $CBO == 2 );
- $rbreak_container->{$closing_seqno} = 1;
- }
+ # Future: output results
- # we will let the trailing block be either broken or intact
- ## && $is_broken_block->($opening_seqno);
+ # A final routine to tie up any loose ends
+ $self->wrapup();
+ return;
+}
- # We can weld the closing brace to its following word ..
- my $Ko = $K_closing_container->{$closing_seqno};
- my $Kon;
- if ( defined($Ko) ) {
- $Kon = $self->K_next_nonblank($Ko);
- }
+sub dump_verbatim {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $line ( @{$rlines} ) {
+ my $input_line = $line->{_line_text};
+ $self->write_unindented_line($input_line);
+ }
+ return;
+}
- # ..unless it is a comment
- if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
- my $dlen =
- $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
- $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
- $rweld_len_right_closing->{$closing_seqno} = $dlen;
+###########################################
+# CODE SECTION 11: Code to break long lists
+###########################################
- # Set flag that we want to break the next container
- # so that the cuddled line is balanced.
- $rbreak_container->{$opening_seqno} = 1
- if ($CBO);
- }
+{ ## begin closure scan_comments
- }
- else {
+ # This routine is called once per file at the start of processing to
+ # make a pass through the lines, looking at lines of CODE and identifying
+ # special processing needs, such format skipping sections marked by
+ # special comments.
- # We are not in a chain. Start a new chain if we see the
- # starting block type.
- if ( $rcuddled_block_types->{$block_type} ) {
- $in_chain{$level} = [ $block_type, $type_sequence ];
- }
- else {
- $block_type = '*';
- $in_chain{$level} = [ $block_type, $type_sequence ];
- }
- }
- }
- elsif ( $token eq '}' ) {
- if ( $in_chain{$level} ) {
+ my $Last_line_had_side_comment;
+ my $In_format_skipping_section;
+ my $Saw_VERSION_in_this_file;
- # We are in a chain at a closing brace. See if this chain
- # continues..
- my $Knn = $self->K_next_code($KK);
- next unless $Knn;
+ sub scan_comments {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
- my $chain_type = $in_chain{$level}->[0];
- my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
- if (
- $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
- )
- {
+ $Last_line_had_side_comment = undef;
+ $In_format_skipping_section = undef;
+ $Saw_VERSION_in_this_file = undef;
- # Note that we do not weld yet because we must wait until
- # we we are sure that an opening brace for this follows.
- $in_chain{$level}->[1] = $type_sequence;
- }
- else { $in_chain{$level} = undef }
- }
+ # Loop over all lines
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ my $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $CODE_type = $self->get_CODE_type($line_of_tokens);
+ $line_of_tokens->{_code_type} = $CODE_type;
}
+ return;
}
- return;
-}
-
-sub weld_nested_containers {
- my ($self) = @_;
- # Called once per file for option '--weld-nested-containers'
+ sub get_CODE_type {
+ my ( $self, $line_of_tokens ) = @_;
- my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
- my $rweld_len_left_opening = $self->[_rweld_len_left_opening_];
- my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
- my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
+ # We are looking at a line of code and setting a flag to
+ # describe any special processing that it requires
- # This routine implements the -wn flag by "welding together"
- # the nested closing and opening tokens which were previously
- # identified by sub 'find_nested_pairs'. "welding" simply
- # involves setting certain hash values which will be checked
- # later during formatting.
+ # Possible CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->get_rLL_max_index();
- my $rnested_pairs = $self->[_rnested_pairs_];
- my $rlines = $self->[_rlines_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
- # Return unless there are nested pairs to weld
- return unless defined($rnested_pairs) && @{$rnested_pairs};
+ my $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ my $rOpts_format_skipping = $rOpts->{'format-skipping'};
- # This array will hold the sequence numbers of the tokens to be welded.
- my @welds;
+ my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
+ my $no_internal_newlines = 1 - $rOpts_add_newlines;
+ if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
+ # extract what we need for this line..
- # A tolerance to the length for length estimates. In some rare cases
- # this can avoid problems where a final weld slightly exceeds the
- # line length and gets broken in a bad spot.
- my $length_tol = 1;
+ my $input_line_number = $line_of_tokens->{_line_number};
- my $excess_length_to_K = sub {
- my ($K) = @_;
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $jmax = -1;
+ if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
+ my $input_line = $line_of_tokens->{_line_text};
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
+ my $is_static_block_comment = 0;
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # Handle a continued quote..
+ if ( $line_of_tokens->{_starting_in_quote} ) {
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( ( $input_line =~ "\t" ) ) {
+ $self->note_embedded_tab($input_line_number);
+ }
+ $Last_line_had_side_comment = 0;
+ return 'VB';
+ }
+ }
- # Abbreviations:
- # _oo=outer opening, i.e. first of { {
- # _io=inner opening, i.e. second of { {
- # _oc=outer closing, i.e. second of } {
- # _ic=inner closing, i.e. first of } }
+ my $is_block_comment =
+ ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
- my $previous_pair;
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
+ $Last_line_had_side_comment = 0;
- # We are working from outermost to innermost pairs so that
- # level changes will be complete when we arrive at the inner pairs.
+ # Note: extra space appended to comment simplifies pattern matching
+ if ( $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_end/ )
+ {
+ $In_format_skipping_section = 0;
+ write_logfile_entry("Exiting formatting skip section\n");
+ }
+ return 'FS';
+ }
- while ( my $item = pop( @{$rnested_pairs} ) ) {
- my ( $inner_seqno, $outer_seqno ) = @{$item};
-
- my $Kouter_opening = $K_opening_container->{$outer_seqno};
- my $Kinner_opening = $K_opening_container->{$inner_seqno};
- my $Kouter_closing = $K_closing_container->{$outer_seqno};
- my $Kinner_closing = $K_closing_container->{$inner_seqno};
-
- my $outer_opening = $rLL->[$Kouter_opening];
- my $inner_opening = $rLL->[$Kinner_opening];
- my $outer_closing = $rLL->[$Kouter_closing];
- my $inner_closing = $rLL->[$Kinner_closing];
-
- my $iline_oo = $outer_opening->[_LINE_INDEX_];
- my $iline_io = $inner_opening->[_LINE_INDEX_];
- my $iline_ic = $inner_closing->[_LINE_INDEX_];
-
- # Set flag saying if this pair starts a new weld
- my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
-
- # Set flag saying if this pair is adjacent to the previous nesting pair
- # (even if previous pair was rejected as a weld)
- my $touch_previous_pair =
- defined($previous_pair) && $outer_seqno == $previous_pair->[0];
- $previous_pair = $item;
+ # See if we are entering a formatting skip section
+ if ( $rOpts_format_skipping
+ && $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_begin/ )
+ {
+ $In_format_skipping_section = 1;
+ write_logfile_entry("Entering formatting skip section\n");
+ $Last_line_had_side_comment = 0;
+ return 'FS';
+ }
- # Set a flag if we should not weld. It sometimes looks best not to weld
- # when the opening and closing tokens are very close. However, there
- # is a danger that we will create a "blinker", which oscillates between
- # two semi-stable states, if we do not weld. So the rules for
- # not welding have to be carefully defined and tested.
- my $do_not_weld;
- if ( !$touch_previous_pair ) {
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
+ }
- # If this pair is not adjacent to the previous pair (skipped or
- # not), then measure lengths from the start of line of oo
+ # Handle a blank line..
+ if ( $jmax < 0 ) {
+ $Last_line_had_side_comment = 0;
+ return 'BL';
+ }
- my $rK_range = $rlines->[$iline_oo]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- $starting_lentot =
- $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment_without_leading_space = 0;
+ if ( $is_block_comment
+ && $rOpts->{'static-block-comments'}
+ && $input_line =~ /$static_block_comment_pattern/ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space =
+ substr( $input_line, 0, 1 ) eq '#';
+ }
- # DO-NOT-WELD RULE 1:
- # Do not weld something that looks like the start of a two-line
- # function call, like this: <<snippets/wn6.in>>
- # $trans->add_transformation(
- # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
- # We will look for a semicolon after the closing paren.
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $is_block_comment
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space = 1;
+ }
- # We want to weld something complex, like this though
- # my $compass = uc( opposite_direction( line_to_canvas_direction(
- # @{ $coords[0] }, @{ $coords[1] } ) ) );
- # Otherwise we will get a 'blinker'. For example, the following
- # would become a blinker without this rule:
- # $Self->_Add( $SortOrderDisplay{ $Field
- # ->GenerateFieldForSelectSQL() } );
- # But it is okay to weld a two-line statement if it looks like
- # it was already welded, meaning that the two opening containers are
- # on a different line that the two closing containers. This is
- # necessary to prevent blinking of something like this with
- # perltidy -wn -pbp (starting indentation two levels deep):
+ # look for hanging side comment
+ if (
+ $is_block_comment
+ && $Last_line_had_side_comment # last line had side comment
+ && $input_line =~ /^\s/ # there is some leading space
+ && !$is_static_block_comment # do not make static comment hanging
+ && $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
+ )
+ {
+ $Last_line_had_side_comment = 1;
+ return 'HSC';
+ }
- # $top_label->set_text( gettext(
- # "Unable to create personal directory - check permissions.") );
+ # remember if this line has a side comment
+ $Last_line_had_side_comment =
+ ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
- my $iline_oc = $outer_closing->[_LINE_INDEX_];
- my $token_oo = $outer_opening->[_TOKEN_];
- if ( $iline_oc == $iline_oo + 1
- && $iline_io == $iline_ic
- && $token_oo eq '(' )
- {
+ # Handle a block (full-line) comment..
+ if ($is_block_comment) {
- # Look for following semicolon...
- my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
- my $next_nonblank_type =
- defined($Knext_nonblank)
- ? $rLL->[$Knext_nonblank]->[_TYPE_]
- : 'b';
- if ( $next_nonblank_type eq ';' ) {
+ # TRIM COMMENTS -- This could be turned off as a option
+ $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
- # Then do not weld if no other containers between inner
- # opening and closing.
- my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
- if ( $Knext_seq_item == $Kinner_closing ) {
- $do_not_weld ||= 1;
- }
- }
+ if ($is_static_block_comment_without_leading_space) {
+ return 'SBCX';
+ }
+ elsif ($is_static_block_comment) {
+ return 'SBC';
+ }
+ else {
+ return 'BC';
}
}
- # DO-NOT-WELD RULE 2:
- # Do not weld an opening paren to an inner one line brace block
- # We will just use old line numbers for this test and require
- # iterations if necessary for convergence
-
- # For example, otherwise we could cause the opening paren
- # in the following example to separate from the caller name
- # as here:
-
- # $_[0]->code_handler
- # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
-
- # Here is another example where we do not want to weld:
- # $wrapped->add_around_modifier(
- # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+ # Patch needed for MakeMaker. Do not break a statement
+ # in which $VERSION may be calculated. See MakeMaker.pm;
+ # this is based on the coding in it.
+ # The first line of a file that matches this will be eval'd:
+ # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
+ # Examples:
+ # *VERSION = \'1.01';
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # We will pass such a line straight through without breaking
+ # it unless -npvl is used.
- # If the one line sub block gets broken due to length or by the
- # user, then we can weld. The result will then be:
- # $wrapped->add_around_modifier( sub {
- # push @tracelog => 'around 1';
- # $_[0]->();
- # } );
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
- if ( $iline_ic == $iline_io ) {
+ my $is_VERSION_statement = 0;
+ if ( !$Saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
+ {
+ $Saw_VERSION_in_this_file = 1;
+ write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- my $token_oo = $outer_opening->[_TOKEN_];
- $do_not_weld ||= $token_oo eq '(';
+ # This code type has lower priority than others
+ $CODE_type = 'VER' unless ($CODE_type);
}
+ return $CODE_type;
+ }
+} ## end closure scan_comments
- # DO-NOT-WELD RULE 3:
- # Do not weld if this makes our line too long
- $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
+sub find_nested_pairs {
+ my $self = shift;
- # DO-NOT-WELD RULE 4; implemented for git#10:
- # Do not weld an opening -ce brace if the next container is on a single
- # line, different from the opening brace. (This is very rare). For
- # example, given the following with -ce, we will avoid joining the {
- # and [
+ # This routine is called once per file to do preliminary work needed for
+ # the --weld-nested option. This information is also needed for adding
+ # semicolons.
- # } else {
- # [ $_, length($_) ]
- # }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # because this would produce a terminal one-line block:
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
- # } else { [ $_, length($_) ] }
+ # We also set the following hash values to identify container pairs for
+ # which the opening and closing tokens are adjacent in the token stream:
+ # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
+ # $seqno_in are the seqence numbers of the outer and inner containers of
+ # the pair We need these later to decide if we can insert a missing
+ # semicolon
+ my $rpaired_to_inner_container = {};
- # which may not be what is desired. But given this input:
+ # This local hash remembers if an outer container has a close following
+ # inner container;
+ # The key is the outer sequence number
+ # The value is the token_hash of the inner container
- # } else { [ $_, length($_) ] }
+ my %has_close_following_opening;
- # then we will do the weld and retain the one-line block
- if ( $rOpts->{'cuddled-else'} ) {
- my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
- if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
- my $io_line = $inner_opening->[_LINE_INDEX_];
- my $ic_line = $inner_closing->[_LINE_INDEX_];
- my $oo_line = $outer_opening->[_LINE_INDEX_];
- $do_not_weld ||=
- ( $oo_line < $io_line && $ic_line == $io_line );
- }
- }
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
- if ($do_not_weld) {
+ my $is_name = sub {
+ my $type = shift;
+ return $type && $is_name_type->{$type};
+ };
- # After neglecting a pair, we start measuring from start of point io
- $starting_lentot =
- $self->cumulative_length_before_K($Kinner_opening);
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $inner_opening->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ my $last_container;
+ my $last_last_container;
+ my $last_nonblank_token_vars;
+ my $last_count;
- # Normally, a broken pair should not decrease indentation of
- # intermediate tokens:
- ## if ( $last_pair_broken ) { next }
- # However, for long strings of welded tokens, such as '{{{{{{...'
- # we will allow broken pairs to also remove indentation.
- # This will keep very long strings of opening and closing
- # braces from marching off to the right. We will do this if the
- # number of tokens in a weld before the broken weld is 4 or more.
- # This rule will mainly be needed for test scripts, since typical
- # welds have fewer than about 4 welded tokens.
- if ( !@welds || @{ $welds[-1] } < 4 ) { next }
- }
+ my $nonblank_token_count = 0;
- # otherwise start new weld ...
- elsif ($starting_new_weld) {
- push @welds, $item;
- }
+ # loop over all tokens
+ foreach my $rtoken_vars ( @{$rLL} ) {
- # ... or extend current weld
- else {
- unshift @{ $welds[-1] }, $inner_seqno;
- }
+ my $type = $rtoken_vars->[_TYPE_];
- # After welding, reduce the indentation level if all intermediate tokens
- my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
- if ( $dlevel != 0 ) {
- my $Kstart = $Kinner_opening;
- my $Kstop = $Kinner_closing;
- for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
- $rLL->[$KK]->[_LEVEL_] += $dlevel;
- }
- }
- }
+ next if ( $type eq 'b' );
- # Define weld lengths needed later to set line breaks
- foreach my $item (@welds) {
+ # long identifier-like items are counted as a single item
+ $nonblank_token_count++
+ unless ( $is_name->($type)
+ && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
- # sweep from inner to outer
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
- my $inner_seqno;
- my $len_close = 0;
- my $len_open = 0;
- foreach my $outer_seqno ( @{$item} ) {
- if ($inner_seqno) {
+ my $token = $rtoken_vars->[_TOKEN_];
- my $dlen_opening =
- $length_to_opening_seqno->($inner_seqno) -
- $length_to_opening_seqno->($outer_seqno);
+ if ( $is_opening_token{$token} ) {
- my $dlen_closing =
- $length_to_closing_seqno->($outer_seqno) -
- $length_to_closing_seqno->($inner_seqno);
+ # following previous opening token ...
+ if ( $last_container
+ && $is_opening_token{ $last_container->[_TOKEN_] } )
+ {
- $len_open += $dlen_opening;
- $len_close += $dlen_closing;
+ # adjacent to this one
+ my $tok_diff = $nonblank_token_count - $last_count;
+ my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
+
+ if ( $tok_diff == 1
+ || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
+ {
+
+ # remember this pair...
+ my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
+ my $inner_seqno = $type_sequence;
+ $has_close_following_opening{$outer_seqno} =
+ $rtoken_vars;
+ }
+ }
}
- $rweld_len_left_closing->{$outer_seqno} = $len_close;
- $rweld_len_right_opening->{$outer_seqno} = $len_open;
+ elsif ( $is_closing_token{$token} ) {
- $inner_seqno = $outer_seqno;
- }
+ # if the corresponding opening token had an adjacent opening
+ if ( $has_close_following_opening{$type_sequence}
+ && $is_closing_token{ $last_container->[_TOKEN_] }
+ && $has_close_following_opening{$type_sequence}
+ ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
+ {
- # sweep from outer to inner
- foreach my $seqno ( reverse @{$item} ) {
- $rweld_len_right_closing->{$seqno} =
- $len_close - $rweld_len_left_closing->{$seqno};
- $rweld_len_left_opening->{$seqno} =
- $len_open - $rweld_len_right_opening->{$seqno};
- }
- }
+ # The closing weld tokens must be adjacent
+ # NOTE: so intermediate commas and semicolons
+ # can currently block a weld. This is something
+ # that could be fixed in the future by including
+ # a flag to delete un-necessary commas and semicolons.
+ my $tok_diff = $nonblank_token_count - $last_count;
- #####################################
- # DEBUG
- #####################################
- if (0) {
- my $count = 0;
- local $" = ')(';
- foreach my $weld (@welds) {
- print "\nWeld number $count has seq: (@{$weld})\n";
- foreach my $seq ( @{$weld} ) {
- print <<EOM;
- seq=$seq
- left_opening=$rweld_len_left_opening->{$seq};
- right_opening=$rweld_len_right_opening->{$seq};
- left_closing=$rweld_len_left_closing->{$seq};
- right_closing=$rweld_len_right_closing->{$seq};
-EOM
+ if ( $tok_diff == 1 ) {
+
+ # This is a closely nested pair ..
+ my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
+ my $outer_seqno = $type_sequence;
+ $rpaired_to_inner_container->{$outer_seqno} =
+ $inner_seqno;
+
+ push @nested_pairs, [ $inner_seqno, $outer_seqno ];
+ }
+ }
}
- $count++;
+ $last_last_container = $last_container;
+ $last_container = $rtoken_vars;
+ $last_count = $nonblank_token_count;
}
+ $last_nonblank_token_vars = $rtoken_vars;
}
+ $self->[_rnested_pairs_] = \@nested_pairs;
+ $self->[_rpaired_to_inner_container_] = $rpaired_to_inner_container;
return;
}
-sub weld_nested_quotes {
+{ ## begin closure check_line_hashes
- # Called once per file for option '--weld-nested-containers'. This
- # does welding on qw quotes.
+ # This code checks that no autovivification occurs in the 'line' hash
- my $self = shift;
+ my %valid_line_hash;
- my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
- my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
+ BEGIN {
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # These keys are defined for each line in the formatter
+ # Each line must have exactly these quantities
+ my @valid_line_keys = qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _rK_range
+ _square_bracket_depth
+ _starting_in_quote
+ _ended_in_blank_token
+ _code_type
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rlines = $self->[_rlines_];
+ _ci_level_0
+ _level_0
+ _nesting_blocks_0
+ _nesting_tokens_0
+ );
- my $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
+ @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
+ }
- my $is_single_quote = sub {
- my ( $Kbeg, $Kend, $quote_type ) = @_;
- foreach my $K ( $Kbeg .. $Kend ) {
- my $test_type = $rLL->[$K]->[_TYPE_];
- next if ( $test_type eq 'b' );
- return if ( $test_type ne $quote_type );
+ sub check_line_hashes {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $rline ( @{$rlines} ) {
+ my $iline = $rline->{_line_number};
+ my $line_type = $rline->{_line_type};
+ check_keys( $rline, \%valid_line_hash,
+ "Checkpoint: line number =$iline, line_type=$line_type", 1 );
}
- return 1;
- };
+ return;
+ }
+} ## end closure check_line_hashes
- my $excess_line_length_K = sub {
- my ( $KK, $Ktest ) = @_;
+sub respace_tokens {
- # what is the excess length if we add token $Ktest to the line with $KK?
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
- my $rK_range = $rlines->[$iline]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $starting_lentot =
- $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- my $starting_indent = 0;
- my $length_tol = 1;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
- my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return $excess_length;
- };
+ # This routine is called once per file to do as much formatting as possible
+ # before new line breaks are set.
- # look for single qw quotes nested in containers
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$outer_seqno ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $outer_seqno not defined at K=$KK");
- }
+ # This routine makes all necessary and possible changes to the tokenization
+ # after the initial tokenization of the file. This is a tedious routine,
+ # but basically it consists of inserting and deleting whitespace between
+ # nonblank tokens according to the selected parameters. In a few cases
+ # non-space characters are added, deleted or modified.
- my $token = $rtoken_vars->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ # The goal of this routine is to create a new token array which only needs
+ # the definition of new line breaks and padding to complete formatting. In
+ # a few cases we have to cheat a little to achieve this goal. In
+ # particular, we may not know if a semicolon will be needed, because it
+ # depends on how the line breaks go. To handle this, we include the
+ # semicolon as a 'phantom' which can be displayed as normal or as an empty
+ # string.
- # see if the next token is a quote of some type
- my $Kn = $self->K_next_nonblank($KK);
- next unless $Kn;
- my $next_token = $rLL->[$Kn]->[_TOKEN_];
- my $next_type = $rLL->[$Kn]->[_TYPE_];
- next
- unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
- && $next_token =~ /^q/ );
+ # Method: The old tokens are copied one-by-one, with changes, from the old
+ # linear storage array $rLL to a new array $rLL_new.
- # The token before the closing container must also be a quote
- my $K_closing = $K_closing_container->{$outer_seqno};
- my $Kt_end = $self->K_previous_nonblank($K_closing);
- next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
+ my $rLL = $self->[_rLL_];
+ my $Klimit_old = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $rpaired_to_inner_container = $self->[_rpaired_to_inner_container_];
+ my $length_function = $self->[_length_function_];
- # Do not weld to single-line quotes. Nothing is gained, and it may
- # look bad.
- next if ( $Kt_end == $Kn );
+ my $rLL_new = []; # This is the new array
+ my $KK = 0;
+ my $rtoken_vars;
+ my $Kmax = @{$rLL} - 1;
- # Only weld to quotes delimited with container tokens. This is
- # because welding to arbitrary quote delimiters can produce code
- # which is less readable than without welding.
- my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
- next
- unless ( $is_closing_token{$closing_delimiter}
- || $closing_delimiter eq '>' );
+ my $CODE_type = "";
+ my $line_type = "";
- # Now make sure that there is just a single quote in the container
- next
- unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
+ my $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ my $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
+ my $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ my $rOpts_ignore_side_comment_lengths =
+ $rOpts->{'ignore-side-comment-lengths'};
- # If welded, the line must not exceed allowed line length
- # Assume old line breaks for this estimate.
- next if ( $excess_line_length_K->( $KK, $Kn ) > 0 );
+ # Set the whitespace flags, which indicate the token spacing preference.
+ my $rwhitespace_flags = $self->set_whitespace_flags();
- # OK to weld
- # FIXME: Are these always correct?
- $rweld_len_left_closing->{$outer_seqno} = 1;
- $rweld_len_right_opening->{$outer_seqno} = 2;
+ # we will be setting token lengths as we go
+ my $cumulative_length = 0;
- # QW PATCH 1 (Testing)
- # undo CI for welded quotes
- foreach my $K ( $Kn .. $Kt_end ) {
- $rLL->[$K]->[_CI_LEVEL_] = 0;
- }
+ # We also define these hash indexes giving container token array indexes
+ # as a function of the container sequence numbers. For example,
+ my $K_opening_container = {}; # opening [ { or (
+ my $K_closing_container = {}; # closing ] } or )
+ my $K_opening_ternary = {}; # opening ? of ternary
+ my $K_closing_ternary = {}; # closing : of ternary
- # Change the level of a closing qw token to be that of the outer
- # containing token. This will allow -lp indentation to function
- # correctly in the vertical aligner.
- $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
+ # List of new K indexes of phantom semicolons
+ # This will be needed if we want to undo them for iterations
+ my $rK_phantom_semicolons = [];
+
+ my %seqno_stack;
+ my %KK_stack;
+ my $depth_next = 0;
+ my $rtype_count_by_seqno = {};
+ my $ris_broken_container = {};
+ my $rhas_broken_container = {};
+ my $rparent_of_seqno = {};
+ my $rchildren_of_seqno = {};
+
+ # a sub to link preceding nodes forward to a new node type
+ my $link_back = sub {
+ my ( $Ktop, $key ) = @_;
+
+ my $Kprev = $Ktop - 1;
+ while ( $Kprev >= 0
+ && !defined( $rLL_new->[$Kprev]->[$key] ) )
+ {
+ $rLL_new->[$Kprev]->[$key] = $Ktop;
+ $Kprev -= 1;
}
- }
- return;
-}
+ };
-sub weld_len_left {
+ # A sub to store one token in the new array
+ # All new tokens must be stored by this sub so that it can update
+ # all data structures on the fly.
+ my $last_nonblank_type = ';';
+ my $last_nonblank_token = ';';
+ my $last_nonblank_block_type = '';
+ my $nonblank_token_count = 0;
+ my $store_token = sub {
+ my ($item) = @_;
- my ( $self, $seqno, $type_or_tok ) = @_;
+ # This will be the index of this item in the new array
+ my $KK_new = @{$rLL_new};
- my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
- my $rweld_len_left_opening = $self->[_rweld_len_left_opening_];
+ # check for a sequenced item (i.e., container or ?/:)
+ my $type_sequence = $item->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its left
+ $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $rweld_len_left_closing->{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $rweld_len_left_opening->{$seqno};
- }
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
+ my $token = $item->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
-sub weld_len_right {
+ $K_opening_container->{$type_sequence} = $KK_new;
+ }
+ elsif ( $is_closing_token{$token} ) {
- my ( $self, $seqno, $type_or_tok ) = @_;
+ $K_closing_container->{$type_sequence} = $KK_new;
+ }
- my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
- my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
+ # These are not yet used but could be useful
+ else {
+ if ( $token eq '?' ) {
+ $K_opening_ternary->{$type_sequence} = $KK_new;
+ }
+ elsif ( $token eq ':' ) {
+ $K_closing_ternary->{$type_sequence} = $KK_new;
+ }
+ else {
+ # shouldn't happen
+ Fault("Ugh: shouldn't happen");
+ }
+ }
+ }
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its right
+ my $type = $item->[_TYPE_];
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $rweld_len_right_closing->{$seqno};
+ # trim comments
+ if ( $type eq '#' ) {
+ $item->[_TOKEN_] =~ s/\s*$//;
}
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $rweld_len_right_opening->{$seqno};
+
+ # Find the length of this token. Later it may be adjusted if phantom
+ # or ignoring side comment lengths.
+ my $token_length = $length_function->( $item->[_TOKEN_] );
+
+ # Mark length of side comments as just 1 if their lengths are ignored
+ if ( $type eq '#'
+ && $rOpts_ignore_side_comment_lengths
+ && ( !$CODE_type || $CODE_type eq 'HSC' ) )
+ {
+ $token_length = 1;
}
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
-sub weld_len_left_to_go {
- my ( $self, $i ) = @_;
+ $item->[_TOKEN_LENGTH_] = $token_length;
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its left
- return if ( $i < 0 );
- my $weld_len =
- $self->weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
+ # and update the cumulative length
+ $cumulative_length += $token_length;
-sub weld_len_right_to_go {
- my ( $self, $i ) = @_;
+ # Save the length sum to just AFTER this token
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its right
- return if ( $i < 0 );
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- my $weld_len =
- $self->weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
+ if ( $type && $type ne 'b' && $type ne '#' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $item->[_TOKEN_];
+ $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
+ $nonblank_token_count++;
-sub adjust_indentation_levels {
+ # count selected types
+ if ( $type =~ /^(=>|,)$/ ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno) ) {
+ $rtype_count_by_seqno->{$seqno}->{$type}++;
+ }
+ }
+ }
- my ($self) = @_;
+ # and finally, add this item to the new array
+ push @{$rLL_new}, $item;
+ };
- # Called once per file to do special indentation adjustments.
- # These routines adjust levels either by changing _CI_LEVEL_ directly or
- # by setting modified levels in the array $self->[_radjusted_levels_].
- # They will create this array if they are active, and otherwise it will be
- # an empty array for later efficiency.
+ my $store_token_and_space = sub {
+ my ( $item, $want_space ) = @_;
- # Set adjusted levels for any non-indenting braces.
- # If this option is used it will create the _radjusted_levels_ array.
- # Important: This must be the first routine called which touches
- # _radjusted_levels_
- $self->non_indenting_braces();
+ # store a token with preceding space if requested and needed
- # Adjust indentation for list containers
- $self->adjust_container_indentation();
+ # First store the space
+ if ( $want_space
+ && @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ my $rcopy = copy_token_as_type( $item, 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+ $store_token->($rcopy);
+ }
- # Set adjusted levels for the whitespace cycle option.
- $self->whitespace_cycle_adjustment();
+ # then the token
+ $store_token->($item);
+ };
- # Adjust continuation indentation if -bli is set
- $self->bli_adjustment();
+ my $K_end_q = sub {
+ my ($KK) = @_;
+ my $K_end = $KK;
+ my $Kn = $self->K_next_nonblank($KK);
+ while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
+ $K_end = $Kn;
+ $Kn = $self->K_next_nonblank($Kn);
+ }
+ return $K_end;
+ };
- # Now clip any adjusted levels to be non-negative
- $self->clip_adjusted_levels();
+ my $add_phantom_semicolon = sub {
- return;
-}
+ my ($KK) = @_;
-sub initialize_adjusted_levels {
- my ($self) = @_;
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
- # Initialize _radjusted_levels if it has not yet been initialized.
- # It is only needed when certain special adjustments are done.
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $rLL = $self->[_rLL_];
- my $Kmax = @{$rLL} - 1;
- if ( !defined($radjusted_levels) || ( @{$radjusted_levels} != @{$rLL} ) ) {
- foreach my $KK ( 0 .. $Kmax ) {
- $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
- }
- }
- return;
-}
+ # we are only adding semicolons for certain block types
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ return
+ unless ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ );
-sub clip_adjusted_levels {
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- # Replace any negative adjusted levels with zero.
- # Negative levels can occur in files with brace errors.
- my ($self) = @_;
- my $radjusted_levels = $self->[_radjusted_levels_];
- return unless defined($radjusted_levels) && @{$radjusted_levels};
- foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
- return;
-}
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
-sub non_indenting_braces {
+ # Do not add a semicolon if...
+ return
+ if (
- # Called once per file to handle the --non-indenting-braces parameter.
- # Remove indentation within marked braces if requested
- # NOTE: This must be the first routine to reference $radjusted_levels;
- my ($self) = @_;
- return unless ( $rOpts->{'non-indenting-braces'} );
+ # it would follow a comment (and be isolated)
+ $previous_nonblank_type eq '#'
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # it follows a code block ( because they are not always wanted
+ # there and may add clutter)
+ || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
- my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ # it would follow a label
+ || $previous_nonblank_type eq 'J'
- my $radjusted_levels;
- my $Kmax = @{$rLL} - 1;
- my @seqno_stack;
+ # it would be inside a 'format' statement (and cause syntax error)
+ || ( $previous_nonblank_type eq 'k'
+ && $previous_nonblank_token =~ /format/ )
- my $is_non_indenting_brace = sub {
- my ($KK) = @_;
+ # if it would prevent welding two containers
+ || $rpaired_to_inner_container->{$type_sequence}
- # looking for an opening block brace
- my $token = $rLL->[$KK]->[_TOKEN_];
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- return unless ( $token eq '{' && $block_type );
+ );
- # followed by a comment
- my $K_sc = $self->K_next_nonblank($KK);
- return unless defined($K_sc);
- my $type_sc = $rLL->[$K_sc]->[_TYPE_];
- return unless ( $type_sc eq '#' );
+ # We will insert an empty semicolon here as a placeholder. Later, if
+ # it becomes the last token on a line, we will bring it to life. The
+ # advantage of doing this is that (1) we just have to check line
+ # endings, and (2) the phantom semicolon has zero width and therefore
+ # won't cause needless breaks of one-line blocks.
+ my $Ktop = -1;
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
+ && $want_left_space{';'} == WS_NO )
+ {
- # on the same line
- my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
- my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
- return unless ( $line_index_sc == $line_index );
+ # convert the blank into a semicolon..
+ # be careful: we are working on the new stack top
+ # on a token which has been stored.
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
- # get the side comment text
- my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
+ # Convert the existing blank to:
+ # a phantom semicolon for one_line_block option = 0 or 1
+ # a real semicolon for one_line_block option = 2
+ my $tok = '';
+ my $len_tok = 0;
+ if ( $rOpts_one_line_block_semicolons == 2 ) {
+ $tok = ';';
+ $len_tok = 1;
+ }
- # The pattern ends in \s but we have removed the newline, so
- # we added it back for the match. That way we require an exact
- # match to the special string and also allow additional text.
- $token_sc .= "\n";
- my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
- if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
- return $is_nib;
- };
+ $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
+ $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
+ $rLL_new->[$Ktop]->[_TYPE_] = ';';
+ $rLL_new->[$Ktop]->[_SLEVEL_] =
+ $rLL->[$KK]->[_SLEVEL_];
- foreach my $KK ( 0 .. $Kmax ) {
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $level_abs = $rLL->[$KK]->[_LEVEL_];
- my $level = $level_abs;
- my $num = @seqno_stack;
- if ($seqno) {
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
- push @seqno_stack, $seqno;
- }
- if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
- pop @seqno_stack;
- $num -= 1;
- }
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+
+ # Then store a new blank
+ $store_token->($rcopy);
}
- $radjusted_levels->[$KK] = $level - $num;
- }
- $self->[_radjusted_levels_] = $radjusted_levels;
- return;
-}
+ else {
-sub whitespace_cycle_adjustment {
+ # insert a new token
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
+ $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
+ $store_token->($rcopy);
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ }
+ };
- my $self = shift;
+ my $check_Q = sub {
- # Called once per file to implement the --whitespace-cycle option
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $radjusted_levels = $self->[_radjusted_levels_];
+ # Check that a quote looks okay
+ # This sub works but needs to by sync'd with the log file output
+ # before it can be used.
+ my ( $KK, $Kfirst, $line_number ) = @_;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
- my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
- if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
- my $Kmax = @{$rLL} - 1;
+ my $previous_nonblank_type_2 = 'b';
+ my $previous_nonblank_token_2 = "";
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
+ $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
+ }
- $self->initialize_adjusted_levels();
+ my $Kn = $self->K_next_nonblank($KK);
+ my $next_nonblank_token = "";
+ if ( defined($Kn) ) {
+ $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ }
- my $whitespace_last_level = -1;
- my @whitespace_level_stack = ();
- my $last_nonblank_type = 'b';
- my $last_nonblank_token = '';
- foreach my $KK ( 0 .. $Kmax ) {
- my $level_abs = $radjusted_levels->[$KK]; ##$rLL->[$KK]->[_LEVEL_];
- my $level = $level_abs;
- if ( $level_abs < $whitespace_last_level ) {
- pop(@whitespace_level_stack);
- }
- if ( !@whitespace_level_stack ) {
- push @whitespace_level_stack, $level_abs;
- }
- elsif ( $level_abs > $whitespace_last_level ) {
- $level = $whitespace_level_stack[-1] +
- ( $level_abs - $whitespace_last_level );
+ my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
- if (
- # 1 Try to break at a block brace
- (
- $level > $rOpts_whitespace_cycle
- && $last_nonblank_type eq '{'
- && $last_nonblank_token eq '{'
- )
+ # make note of something like '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
+ if (
+ $token =~ /^(s|tr|y|m|\/)/
+ && $previous_nonblank_token =~ /^(=|==|!=)$/
- # 2 Then either a brace or bracket
- || ( $level > $rOpts_whitespace_cycle + 1
- && $last_nonblank_token =~ /^[\{\[]$/ )
+ # preceded by simple scalar
+ && $previous_nonblank_type_2 eq 'i'
+ && $previous_nonblank_token_2 =~ /^\$/
- # 3 Then a paren too
- || $level > $rOpts_whitespace_cycle + 2
- )
- {
- $level = 1;
- }
- push @whitespace_level_stack, $level;
- }
- $level = $whitespace_level_stack[-1];
- $radjusted_levels->[$KK] = $level;
+ # followed by some kind of termination
+ # (but give complaint if we can not see far enough ahead)
+ && $next_nonblank_token =~ /^[; \)\}]$/
- $whitespace_last_level = $level_abs;
- my $type = $rLL->[$KK]->[_TYPE_];
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
+ # scalar is not declared
+ && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
+ )
+ {
+ my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
+ complain(
+"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+ );
}
- }
- $self->[_radjusted_levels_] = $radjusted_levels;
- return;
-}
+ };
-sub adjust_container_indentation {
+ # Main loop over all lines of the file
+ my $last_K_out;
- # Called once per file to implement the -bbhb* and related flags:
+ # Testing option to break qw. Do not use; it can make a mess.
+ my $ALLOW_BREAK_MULTILINE_QW = 0;
+ my $in_multiline_qw;
+ foreach my $line_of_tokens ( @{$rlines} ) {
- # -bbhbi=n
- # -bbsbi=n
- # -bbpi=n
+ my $input_line_number = $line_of_tokens->{_line_number};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
- # where:
+ # Check for correct sequence of token indexes...
+ # An error here means that sub write_line() did not correctly
+ # package the tokenized lines as it received them.
+ if ( defined($last_K_out) ) {
+ if ( $Kfirst != $last_K_out + 1 ) {
+ Fault(
+ "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
+ );
+ }
+ }
+ else {
+ if ( $Kfirst != 0 ) {
+ Fault("Program Bug: first K is $Kfirst but should be 0");
+ }
+ }
+ $last_K_out = $Klast;
- # n=0 default indentation (usually one ci)
- # n=1 outdent one ci
- # n=2 indent one level (minus one ci)
- # n=3 indent one extra ci [This may be dropped]
+ # Handle special lines of code
+ if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
- my ($self) = @_;
+ # CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'FS' = Format Skipping - line goes out verbatim, no blanks
+ # 'IO' = Indent Only - only indentation may be changed
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
- return unless %container_indentation_options;
+ # For a hanging side comment we insert an empty quote before
+ # the comment so that it becomes a normal side comment and
+ # will be aligned by the vertical aligner
+ if ( $CODE_type eq 'HSC' ) {
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # Safety Check: This must be a line with one token (a comment)
+ my $rtoken_vars = $rLL->[$Kfirst];
+ if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
- # Option 2 needs the following array:
- my $radjusted_levels = $self->[_radjusted_levels_];
+ # Note that even if the flag 'noadd-whitespace' is set, we
+ # will make an exception here and allow a blank to be
+ # inserted to push the comment to the right. We can think
+ # of this as an adjustment of indentation rather than
+ # whitespace between tokens. This will also prevent the
+ # hanging side comment from getting converted to a block
+ # comment if whitespace gets deleted, as for example with
+ # the -extrude and -mangle options.
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
+ $store_token->($rcopy);
+ $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
- # We will only initialize it if option 2 has been selected
- foreach my $key (%container_indentation_options) {
- my $val = $container_indentation_options{$key};
- if ( defined($val) && $val == 2 ) {
- $self->initialize_adjusted_levels();
- last;
+ # This line was mis-marked by sub scan_comment
+ Fault(
+ "Program bug. A hanging side comment has been mismarked"
+ );
+ }
+ }
+
+ # Copy tokens unchanged
+ foreach my $KK ( $Kfirst .. $Klast ) {
+ $store_token->( $rLL->[$KK] );
+ }
+ next;
}
- }
- # Loop over all opening container tokens
- my $K_opening_container = $self->[_K_opening_container_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- foreach my $seqno ( keys %{$K_opening_container} ) {
- my $KK = $K_opening_container->{$seqno};
+ # Handle normal line..
- # this routine is not for code block braces
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- next if ($block_type);
+ # Insert any essential whitespace between lines
+ # if last line was normal CODE.
+ # Patch for rt #125012: use K_previous_code rather than '_nonblank'
+ # because comments may disappear.
+ my $type_next = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
+ my $Kp = $self->K_previous_code( undef, $rLL_new );
+ if ( $last_line_type eq 'CODE'
+ && $type_next ne 'b'
+ && defined($Kp) )
+ {
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
- # These flags only apply if the corresponding -bb* flags
- # have been set to non-default values
- my $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $flag = $container_indentation_options{$token};
- next unless ($flag);
+ my ( $token_pp, $type_pp );
+ my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
- # Require previous nonblank to be certain types (= and =>)
- # Note similar coding in sub insert_breaks_before...
- my $Kprev = $KK - 1;
- next if ( $Kprev < 0 );
- my $prev_type = $rLL->[$Kprev]->[_TYPE_];
- if ( $prev_type eq 'b' ) {
- $Kprev--;
- next if ( $Kprev < 0 );
- $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ if (
+ is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ )
+ )
+ {
+
+ # Copy this first token as blank, but use previous line number
+ my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+ $store_token->($rcopy);
+ }
}
- next unless ( $is_equal_or_fat_comma{$prev_type} );
- # This is only for list containers
- next unless $self->is_list($seqno);
+ # loop to copy all tokens on this line, with any changes
+ my $type_sequence;
+ for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
+ $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $last_type_sequence = $type_sequence;
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- # and only for broken lists
- next unless $ris_broken_container->{$seqno};
+ # Handle a blank space ...
+ if ( $type eq 'b' ) {
- # NOTE: We are adjusting indentation of the opening container. The
- # closing container will normally follow the indentation of the opening
- # container automatically, so this is not currently done.
- my $ci = $rLL->[$KK]->[_CI_LEVEL_];
- next unless ($ci);
+ # Delete it if not wanted by whitespace rules
+ # or we are deleting all whitespace
+ # Note that whitespace flag is a flag indicating whether a
+ # white space BEFORE the token is needed
+ next if ( $KK >= $Klast ); # skip terminal blank
+ my $Knext = $KK + 1;
+ my $ws = $rwhitespace_flags->[$Knext];
+ if ( $ws == -1
+ || $rOpts_delete_old_whitespace )
+ {
- # option 1: outdent
- if ( $flag == 1 ) {
- $ci -= 1;
- }
+ # FIXME: maybe switch to using _new
+ my $Kp = $self->K_previous_nonblank($KK);
+ next unless defined($Kp);
+ my $token_p = $rLL->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
- # option 2: indent one level
- elsif ( $flag == 2 ) {
- $ci -= 1;
- $radjusted_levels->[$KK] += 1;
- }
+ my ( $token_pp, $type_pp );
- # option 3: for testing only, probably will be deleted
- elsif ( $flag == 3 ) {
- $ci += 1;
- }
- $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
- }
- return;
-}
+ my $Kpp = $self->K_previous_nonblank($Kp);
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
+ my $token_next = $rLL->[$Knext]->[_TOKEN_];
+ my $type_next = $rLL->[$Knext]->[_TYPE_];
-sub bli_adjustment {
+ my $do_not_delete = is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ );
- # Called once per file to implement the --brace-left-and-indent option.
- # If -bli is set, adds one continuation indentation for certain braces
- my $self = shift;
- return unless ( $rOpts->{'brace-left-and-indent'} );
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- if ( $block_type && $block_type =~ /$bli_pattern/ ) {
- $rLL->[$KK]->[_CI_LEVEL_]++;
- }
- }
- return;
-}
+ next unless ($do_not_delete);
+ }
-sub resync_lines_and_tokens {
+ # make it just one character if allowed
+ if ($rOpts_add_whitespace) {
+ $rtoken_vars->[_TOKEN_] = ' ';
+ }
+ $store_token->($rtoken_vars);
+ next;
+ }
- my $self = shift;
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines = $self->[_rlines_];
+ # Handle a nonblank token...
- # Re-construct the arrays of tokens associated with the original input lines
- # since they have probably changed due to inserting and deleting blanks
- # and a few other tokens.
+ # check for a qw quote
+ if ( $type eq 'q' ) {
- my $Kmax = -1;
+ # trim blanks from right of qw quotes
+ # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
+ # this)
+ $token =~ s/\s*$//;
+ $rtoken_vars->[_TOKEN_] = $token;
+ $self->note_embedded_tab($input_line_number)
+ if ( $token =~ "\t" );
- # This is the next token and its line index:
- my $Knext = 0;
- my $inext;
- if ( defined($rLL) && @{$rLL} ) {
- $Kmax = @{$rLL} - 1;
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
+ if ($in_multiline_qw) {
- my $get_inext = sub {
- if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
- else {
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
- return $inext;
- };
+ # If we are at the end of a multiline qw ..
+ if ( $in_multiline_qw == $KK ) {
- # Remember the most recently output token index
- my $Klast_out;
+ # Split off the closing delimiter character
+ # so that the formatter can put a line break there if necessary
+ my $part1 = $token;
+ my $part2 = substr( $part1, -1, 1, "" );
- my $iline = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $iline++;
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type eq 'CODE' ) {
+ if ($part1) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'q', $part1 );
+ $store_token->($rcopy);
+ $token = $part2;
+ $rtoken_vars->[_TOKEN_] = $token;
- my @K_array;
- my $rK_range;
- $inext = $get_inext->();
- while ( defined($inext) && $inext <= $iline ) {
- push @{K_array}, $Knext;
- $Knext += 1;
- $inext = $get_inext->();
- }
+ }
+ $in_multiline_qw = undef;
- # Delete any terminal blank token
- if (@K_array) {
- if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
- pop @K_array;
+ # store without preceding blank
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
+ # continuing a multiline qw
+ $store_token->($rtoken_vars);
+ next;
+ }
}
- }
- # Define the range of K indexes for the line:
- # $Kfirst = index of first token on line
- # $Klast_out = index of last token on line
- my ( $Kfirst, $Klast );
- if (@K_array) {
- $Kfirst = $K_array[0];
- $Klast = $K_array[-1];
- $Klast_out = $Klast;
- }
+ else {
- # It is only safe to trim the actual line text if the input
- # line had a terminal blank token. Otherwise, we may be
- # in a quote.
- if ( $line_of_tokens->{_ended_in_blank_token} ) {
- $line_of_tokens->{_line_text} =~ s/\s+$//;
- }
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+ # we are encountered new qw token...see if multiline
+ my $K_end = $K_end_q->($KK);
+ if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
- # Deleting semicolons can create new empty code lines
- # which should be marked as blank
- if ( !defined($Kfirst) ) {
- my $code_type = $line_of_tokens->{_code_type};
- if ( !$code_type ) {
- $line_of_tokens->{_code_type} = 'BL';
- }
- }
- }
- }
+ # Starting multiline qw...
+ # set flag equal to the ending K
+ $in_multiline_qw = $K_end;
- # There shouldn't be any nodes beyond the last one unless we start
- # allowing 'link_after' calls
- if ( defined($inext) ) {
+ # Split off the leading part
+ # so that the formatter can put a line break there if necessary
+ if ( $token =~ /^(qw\s*.)(.*)$/ ) {
+ my $part1 = $1;
+ my $part2 = $2;
+ if ($part2) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'q',
+ $part1 );
+ $store_token_and_space->(
+ $rcopy, $rwhitespace_flags->[$KK] == WS_YES
+ );
+ $token = $part2;
+ $rtoken_vars->[_TOKEN_] = $token;
- Fault("unexpected tokens at end of file when reconstructing lines");
- }
+ # Second part goes without intermediate blank
+ $store_token->($rtoken_vars);
+ next;
+ }
+ }
+ }
+ else {
- return;
-}
+ # this is a new single token qw -
+ # store with possible preceding blank
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
+ next;
+ }
+ }
+ } ## end if ( $type eq 'q' )
-sub dump_verbatim {
- my $self = shift;
- my $rlines = $self->[_rlines_];
- foreach my $line ( @{$rlines} ) {
- my $input_line = $line->{_line_text};
- $self->write_unindented_line($input_line);
- }
- return;
-}
+ # Modify certain tokens here for whitespace
+ # The following is not yet done, but could be:
+ # sub (x x x)
+ elsif ( $type =~ /^[wit]$/ ) {
-sub finish_formatting {
+ # Examples: <<snippets/space1.in>>
+ # change '$ var' to '$var' etc
+ # '-> new' to '->new'
+ if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
+ $token =~ s/\s*//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- my ( $self, $severe_error ) = @_;
+ # Split identifiers with leading arrows, inserting blanks if
+ # necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->' and
+ # 'new' with a possible blank between.
+ #
+ # Note: there is a related patch in sub set_whitespace_flags
+ if ( $token =~ /^\-\>(.*)$/ && $1 ) {
+ my $token_save = $1;
+ my $type_save = $type;
- # The file has been tokenized and is ready to be formatted.
- # All of the relevant data is stored in $self, ready to go.
+ # store a blank to left of arrow if necessary
+ my $Kprev = $self->K_previous_nonblank($KK);
+ if ( defined($Kprev)
+ && $rLL->[$Kprev]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace
+ && $want_left_space{'->'} == WS_YES )
+ {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
- # output file verbatim if severe error or no formatting requested
- if ( $severe_error || $rOpts->{notidy} ) {
- $self->dump_verbatim();
- $self->wrapup();
- return;
- }
+ # then store the arrow
+ my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
+ $store_token->($rcopy);
- # Make a pass through the lines, looking at lines of CODE and identifying
- # special processing needs, such format skipping sections marked by
- # special comments
- $self->scan_comments();
+ # store a blank after the arrow if requested
+ # added for issue git #33
+ if ( $want_right_space{'->'} == WS_YES ) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
- # Find nested pairs of container tokens for any welding. This information
- # is also needed for adding semicolons, so it is split apart from the
- # welding step.
- $self->find_nested_pairs();
+ # then reset the current token to be the remainder,
+ # and reset the whitespace flag according to the arrow
+ $token = $rtoken_vars->[_TOKEN_] = $token_save;
+ $type = $rtoken_vars->[_TYPE_] = $type_save;
+ $store_token->($rtoken_vars);
+ next;
+ }
- # Make sure everything looks good
- $self->check_line_hashes();
+ if ( $token =~ /$ANYSUB_PATTERN/ ) {
- # Future: Place to Begin future Iteration Loop
- # foreach my $it_count(1..$maxit) {
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ my $spp = $rOpts->{'space-prototype-paren'};
+ if ( defined($spp) ) {
+ if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
+ elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ }
- # Future: We must reset some things after the first iteration.
- # This includes:
- # - resetting levels if there was any welding
- # - resetting any phantom semicolons
- # - dealing with any line numbering issues so we can relate final lines
- # line numbers with input line numbers.
- #
- # If ($it_count>1) {
- # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
- # Renumber lines
- # }
+ # one space max, and no tabs
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # Make a pass through all tokens, adding or deleting any whitespace as
- # required. Also make any other changes, such as adding semicolons.
- # All token changes must be made here so that the token data structure
- # remains fixed for the rest of this iteration.
- $self->respace_tokens();
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ if ( $type eq 'i' ) {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
- # Implement any welding needed for the -wn or -cb options
- $self->weld_containers();
+ # change 'LABEL :' to 'LABEL:'
+ elsif ( $type eq 'J' ) {
+ $token =~ s/\s+//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # Locate small nested blocks which should not be broken
- $self->mark_short_nested_blocks();
+ # patch to add space to something like "x10"
+ # This avoids having to split this token in the pre-tokenizer
+ elsif ( $type eq 'n' ) {
+ if ( $token =~ /^x\d+/ ) {
+ $token =~ s/x/x /;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
- $self->adjust_indentation_levels();
+ # check a quote for problems
+ elsif ( $type eq 'Q' ) {
+ $check_Q->( $KK, $Kfirst, $input_line_number );
+ }
- # Finishes formatting and write the result to the line sink.
- # Eventually this call should just change the 'rlines' data according to the
- # new line breaks and then return so that we can do an internal iteration
- # before continuing with the next stages of formatting.
- $self->process_all_lines();
+ # handle semicolons
+ elsif ( $type eq ';' ) {
- ############################################################
- # A possible future decomposition of 'process_all_lines()' follows.
- # Benefits:
- # - allow perltidy to do an internal iteration which eliminates
- # many unnecessary steps, such as re-parsing and vertical alignment.
- # This will allow iterations to be automatic.
- # - consolidate all length calculations to allow utf8 alignment
- ############################################################
+ # Remove unnecessary semicolons, but not after bare
+ # blocks, where it could be unsafe if the brace is
+ # mistokenized.
+ if (
+ $rOpts->{'delete-semicolons'}
+ && (
+ (
+ $last_nonblank_type eq '}'
+ && (
+ $is_block_without_semicolon{
+ $last_nonblank_block_type}
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
+ || $last_nonblank_block_type =~ /^\w+:$/ )
+ )
+ || $last_nonblank_type eq ';'
+ )
+ )
+ {
- # Future: Check for convergence of beginning tokens on CODE lines
+ # This looks like a deletable semicolon, but even if a
+ # semicolon can be deleted it is necessarily best to do so.
+ # We apply these additional rules for deletion:
+ # - Always ok to delete a ';' at the end of a line
+ # - Never delete a ';' before a '#' because it would
+ # promote it to a block comment.
+ # - If a semicolon is not at the end of line, then only
+ # delete if it is followed by another semicolon or closing
+ # token. This includes the comment rule. It may take
+ # two passes to get to a final state, but it is a little
+ # safer. For example, keep the first semicolon here:
+ # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
+ # It is not required but adds some clarity.
+ my $ok_to_delete = 1;
+ if ( $KK < $Klast ) {
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) && $Kn <= $Klast ) {
+ my $next_nonblank_token_type =
+ $rLL->[$Kn]->[_TYPE_];
+ $ok_to_delete = $next_nonblank_token_type eq ';'
+ || $next_nonblank_token_type eq '}';
+ }
+ }
- # Future: End of Iteration Loop
+ # do not delete only nonblank token in a file
+ else {
+ my $Kn = $self->K_next_nonblank($KK);
+ $ok_to_delete = defined($Kn) || $nonblank_token_count;
+ }
- # Future: add_padding($rargs);
+ if ($ok_to_delete) {
+ $self->note_deleted_semicolon($input_line_number);
+ next;
+ }
+ else {
+ write_logfile_entry("Extra ';'\n");
+ }
+ }
+ }
- # Future: add_closing_side_comments($rargs);
+ elsif ($type_sequence) {
- # Future: vertical_alignment($rargs);
+ if ( $is_opening_token{$token} ) {
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ push @{ $rchildren_of_seqno->{$seqno_parent} },
+ $type_sequence;
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
+ $seqno_stack{$depth_next} = $type_sequence;
+ $KK_stack{$depth_next} = $KK;
+ $depth_next++;
+ }
+ elsif ( $is_closing_token{$token} ) {
+ $depth_next--;
- # Future: output results
+ # keep track of broken lists for later formatting
+ my $seqno_test = $seqno_stack{$depth_next};
+ my $KK_open = $KK_stack{$depth_next};
+ my $seqno_outer = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno_test)
+ && defined($KK_open)
+ && $seqno_test == $type_sequence )
+ {
+ my $lx_open = $rLL->[$KK_open]->[_LINE_INDEX_];
+ my $lx_close = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $lx_open < $lx_close ) {
+ $ris_broken_container->{$type_sequence} =
+ $lx_close - $lx_open;
+ if ( defined($seqno_outer) ) {
+ $rhas_broken_container->{$seqno_outer} = 1;
+ }
+ }
+ }
- # A final routine to tie up any loose ends
- $self->wrapup();
- return;
-}
+ # Insert a tentative missing semicolon if the next token is
+ # a closing block brace
+ if (
+ $type eq '}'
+ && $token eq '}'
-sub get_spaces {
+ # not preceded by a ';'
+ && $last_nonblank_type ne ';'
- # return the number of leading spaces associated with an indentation
- # variable $indentation is either a constant number of spaces or an object
- # with a get_spaces method.
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
-}
+ # and this is not a VERSION stmt (is all one line, we are not
+ # inserting semicolons on one-line blocks)
+ && $CODE_type ne 'VER'
-sub get_recoverable_spaces {
+ # and we are allowed to add semicolons
+ && $rOpts->{'add-semicolons'}
+ )
+ {
+ $add_phantom_semicolon->($KK);
+ }
+ }
+ }
- # return the number of spaces (+ means shift right, - means shift left)
- # that we would like to shift a group of lines with the same indentation
- # to get them to line up with their opening parens
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
-}
+ # Store this token with possible previous blank
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
-sub get_available_spaces_to_go {
+ } # End token loop
+ } # End line loop
- my ( $self, $ii ) = @_;
- my $item = $leading_spaces_to_go[$ii];
+ # Reset memory to be the new array
+ $self->[_rLL_] = $rLL_new;
+ $self->set_rLL_max_index();
+ $self->[_K_opening_container_] = $K_opening_container;
+ $self->[_K_closing_container_] = $K_closing_container;
+ $self->[_K_opening_ternary_] = $K_opening_ternary;
+ $self->[_K_closing_ternary_] = $K_closing_ternary;
+ $self->[_rK_phantom_semicolons_] = $rK_phantom_semicolons;
+ $self->[_rtype_count_by_seqno_] = $rtype_count_by_seqno;
+ $self->[_ris_broken_container_] = $ris_broken_container;
+ $self->[_rhas_broken_container_] = $rhas_broken_container;
+ $self->[_rparent_of_seqno_] = $rparent_of_seqno;
+ $self->[_rchildren_of_seqno_] = $rchildren_of_seqno;
- # return the number of available leading spaces associated with an
- # indentation variable. $indentation is either a constant number of
- # spaces or an object with a get_available_spaces method.
- return ref($item) ? $item->get_available_spaces() : 0;
-}
+ # make sure the new array looks okay
+ $self->check_token_array();
-{ ## begin closure set_leading_whitespace (for -lp indentation)
+ # reset the token limits of each line
+ $self->resync_lines_and_tokens();
- # These routines are called batch-by-batch to handle the -lp indentation
- # option. The coding is rather complex, but is only for -lp.
+ return;
+}
- my $gnu_position_predictor;
- my $gnu_sequence_number;
- my $line_start_index_to_go;
- my $max_gnu_item_index;
- my $max_gnu_stack_index;
- my %gnu_arrow_count;
- my %gnu_comma_count;
- my %last_gnu_equals;
- my @gnu_item_list;
- my @gnu_stack;
+sub copy_token_as_type {
- sub initialize_gnu_vars {
+ # This provides a quick way to create a new token by
+ # slightly modifying an existing token.
+ my ( $rold_token, $type, $token ) = @_;
+ if ( $type eq 'b' ) {
+ $token = " " unless defined($token);
+ }
+ elsif ( $type eq 'q' ) {
+ $token = '' unless defined($token);
+ }
+ elsif ( $type eq '->' ) {
+ $token = '->' unless defined($token);
+ }
+ elsif ( $type eq ';' ) {
+ $token = ';' unless defined($token);
+ }
+ else {
+ Fault(
+"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
+ );
+ }
- # initialize gnu variables for a new file;
- # must be called once at the start of a new file.
+ my $rnew_token = [ map { $_ } @{$rold_token} ];
+ $rnew_token->[_TYPE_] = $type;
+ $rnew_token->[_TOKEN_] = $token;
+ $rnew_token->[_BLOCK_TYPE_] = '';
+ $rnew_token->[_CONTAINER_TYPE_] = '';
+ $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
+ $rnew_token->[_TYPE_SEQUENCE_] = '';
+ return $rnew_token;
+}
- # initialize the leading whitespace stack to negative levels
- # so that we can never run off the end of the stack
- $gnu_position_predictor =
- 0; # where the current token is predicted to be
- $max_gnu_stack_index = 0;
- $max_gnu_item_index = -1;
- $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- return;
- }
+sub Debug_dump_tokens {
- sub initialize_gnu_batch_vars {
+ # a debug routine, not normally used
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $nvars = @{$rLL};
+ print STDERR "$msg\n";
+ print STDERR "ntokens=$nvars\n";
+ print STDERR "K\t_TOKEN_\t_TYPE_\n";
+ my $K = 0;
- # initialize gnu variables for a new batch;
- # must be called before each new batch
- $gnu_sequence_number++; # increment output batch counter
- %last_gnu_equals = ();
- %gnu_comma_count = ();
- %gnu_arrow_count = ();
- $line_start_index_to_go = 0;
- $max_gnu_item_index = UNDEFINED_INDEX;
- return;
+ foreach my $item ( @{$rLL} ) {
+ print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
+ $K++;
}
+ return;
+}
- sub new_lp_indentation_item {
-
- # this is an interface to the IndentationItem class
- my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
+sub K_next_code {
+ my ( $self, $KK, $rLL ) = @_;
- # A negative level implies not to store the item in the item_list
- my $index = 0;
- if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
+ # return the index K of the next nonblank, non-comment token
+ return unless ( defined($KK) && $KK >= 0 );
- my $starting_index_K = 0;
- if ( defined($line_start_index_to_go)
- && $line_start_index_to_go >= 0
- && $line_start_index_to_go <= $max_index_to_go )
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb");
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Knnb]->[_TYPE_] ne '#' )
{
- $starting_index_K = $K_to_go[$line_start_index_to_go];
+ return $Knnb;
}
+ $Knnb++;
+ }
+ return;
+}
- my $item = Perl::Tidy::IndentationItem->new(
- spaces => $spaces,
- level => $level,
- ci_level => $ci_level,
- available_spaces => $available_spaces,
- index => $index,
- gnu_sequence_number => $gnu_sequence_number,
- align_paren => $align_paren,
- stack_depth => $max_gnu_stack_index,
- starting_index_K => $starting_index_K,
- );
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
- if ( $level >= 0 ) {
- $gnu_item_list[$max_gnu_item_index] = $item;
- }
+ # return the index K of the next nonblank token
+ return unless ( defined($KK) && $KK >= 0 );
- return $item;
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb");
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
}
+ return;
+}
- sub set_leading_whitespace {
+sub K_previous_code {
- # This routine defines leading whitespace for the case of -lp formatting
- # given: the level and continuation_level of a token,
- # define: space count of leading string which would apply if it
- # were the first token of a new line.
+ # return the index K of the previous nonblank, non-comment token
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
- my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
- $level_abs, $ci_level, $in_continued_quote )
- = @_;
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
- return unless ($rOpts_line_up_parentheses);
- return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
+ # The caller should make the first call with KK_new=undef to
+ # avoid this error
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ );
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
+ {
+ return $Kpnb;
+ }
+ $Kpnb--;
+ }
+ return;
+}
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rLL = $self->[_rLL_];
+sub K_previous_nonblank {
- # find needed previous nonblank tokens
- my $last_nonblank_token = '';
- my $last_nonblank_type = '';
- my $last_nonblank_block_type = '';
+ # return index of previous nonblank token before item K;
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
- # and previous nonblank tokens, just in this batch:
- my $last_nonblank_token_in_batch = '';
- my $last_nonblank_type_in_batch = '';
- my $last_last_nonblank_type_in_batch = '';
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
- if ( defined($K_last_nonblank) ) {
- $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
- $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
- $last_nonblank_block_type =
- $rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
+ # The caller should make the first call with KK_new=undef to
+ # avoid this error
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ );
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
+ return;
+}
- if ( $K_last_nonblank >= $K_to_go[0] ) {
- $last_nonblank_token_in_batch = $last_nonblank_token;
- $last_nonblank_type_in_batch = $last_nonblank_type;
- if ( defined($K_last_last_nonblank)
- && $K_last_last_nonblank > $K_to_go[0] )
- {
- $last_last_nonblank_type_in_batch =
- $rLL->[$K_last_last_nonblank]->[_TYPE_];
- }
- }
- }
+sub get_old_line_index {
- ################################################################
+ # return index of the original line that token K was on
+ my ( $self, $K ) = @_;
+ my $rLL = $self->[_rLL_];
+ return 0 unless defined($K);
+ return $rLL->[$K]->[_LINE_INDEX_];
+}
- # Adjust levels if necessary to recycle whitespace:
- my $level = $level_abs;
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $nK = @{$rLL};
- my $nws = @{$radjusted_levels};
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
- $level = $radjusted_levels->[$Kj];
- if ( $level < 0 ) { $level = 0 } # note: this should not happen
- }
+sub get_old_line_count {
- # The continued_quote flag means that this is the first token of a
- # line, and it is the continuation of some kind of multi-line quote
- # or pattern. It requires special treatment because it must have no
- # added leading whitespace. So we create a special indentation item
- # which is not in the stack.
- if ($in_continued_quote) {
- my $space_count = 0;
- my $available_space = 0;
- $level = -1; # flag to prevent storing in item_list
- $leading_spaces_to_go[$max_index_to_go] =
- $reduced_spaces_to_go[$max_index_to_go] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, 0 );
- return;
- }
+ # return number of input lines separating two tokens
+ my ( $self, $Kbeg, $Kend ) = @_;
+ my $rLL = $self->[_rLL_];
+ return 0 unless defined($Kbeg);
+ return 0 unless defined($Kend);
+ return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
+}
- # get the top state from the stack
- my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
- my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
- my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+sub is_list {
- my $type = $types_to_go[$max_index_to_go];
- my $token = $tokens_to_go[$max_index_to_go];
- my $total_depth = $nesting_depth_to_go[$max_index_to_go];
+ # Return true if the immediate contents of a container appears to be a
+ # list.
- if ( $type eq '{' || $type eq '(' ) {
+ my ( $self, $seqno ) = @_;
+ return unless defined($seqno);
- $gnu_comma_count{ $total_depth + 1 } = 0;
- $gnu_arrow_count{ $total_depth + 1 } = 0;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
- # If we come to an opening token after an '=' token of some type,
- # see if it would be helpful to 'break' after the '=' to save space
- my $last_equals = $last_gnu_equals{$total_depth};
- if ( $last_equals && $last_equals > $line_start_index_to_go ) {
+ my $rLL = $self->[_rLL_];
+ my $block_type = $rLL->[$K_opening]->[_BLOCK_TYPE_];
+ return if ($block_type);
- # find the position if we break at the '='
- my $i_test = $last_equals;
- if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+ my $token = $rLL->[$K_opening]->[_TOKEN_];
+ return if ( $token eq ':' );
- # TESTING
- ##my $too_close = ($i_test==$max_index_to_go-1);
+ # We will require at least 2 commas or 1 fat comma in the
+ # immediate lower level.
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $fat_comma_count = $rtype_count_by_seqno->{$seqno}->{'=>'};
+ my $comma_count = $rtype_count_by_seqno->{$seqno}->{','};
+ my $is_list = ( $fat_comma_count || $comma_count && $comma_count > 1 );
+ return $is_list;
+}
- my $test_position =
- total_line_length( $i_test, $max_index_to_go );
- my $mll = maximum_line_length($i_test);
+sub resync_lines_and_tokens {
- if (
+ my $self = shift;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
- # the equals is not just before an open paren (testing)
- ##!$too_close &&
+ # Re-construct the arrays of tokens associated with the original input lines
+ # since they have probably changed due to inserting and deleting blanks
+ # and a few other tokens.
- # if we are beyond the midpoint
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length / 2
+ my $Kmax = -1;
- # or we are beyond the 1/4 point and there was an old
- # break at the equals
- || (
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length * 3 / 4
- && (
- $old_breakpoint_to_go[$last_equals]
- || ( $last_equals > 0
- && $old_breakpoint_to_go[ $last_equals - 1 ] )
- || ( $last_equals > 1
- && $types_to_go[ $last_equals - 1 ] eq 'b'
- && $old_breakpoint_to_go[ $last_equals - 2 ] )
- )
- )
- )
- {
+ # This is the next token and its line index:
+ my $Knext = 0;
+ my $inext;
+ if ( defined($rLL) && @{$rLL} ) {
+ $Kmax = @{$rLL} - 1;
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
+ }
- # then make the switch -- note that we do not set a real
- # breakpoint here because we may not really need one; sub
- # scan_list will do that if necessary
- $line_start_index_to_go = $i_test + 1;
- $gnu_position_predictor = $test_position;
- }
- }
+ my $get_inext = sub {
+ if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
+ else {
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
}
+ return $inext;
+ };
- my $halfway =
- maximum_line_length_for_level($level) -
- $rOpts_maximum_line_length / 2;
+ # Remember the most recently output token index
+ my $Klast_out;
- # Check for decreasing depth ..
- # Note that one token may have both decreasing and then increasing
- # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
- # in this example we would first go back to (1,0) then up to (2,0)
- # in a single call.
- if ( $level < $current_level || $ci_level < $current_ci_level ) {
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type eq 'CODE' ) {
- # loop to find the first entry at or completely below this level
- my ( $lev, $ci_lev );
- while (1) {
- if ($max_gnu_stack_index) {
+ my @K_array;
+ my $rK_range;
+ $inext = $get_inext->();
+ while ( defined($inext) && $inext <= $iline ) {
+ push @{K_array}, $Knext;
+ $Knext += 1;
+ $inext = $get_inext->();
+ }
- # save index of token which closes this level
- $gnu_stack[$max_gnu_stack_index]
- ->set_closed($max_index_to_go);
+ # Delete any terminal blank token
+ if (@K_array) {
+ if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
+ pop @K_array;
+ }
+ }
- # Undo any extra indentation if we saw no commas
- my $available_spaces =
- $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
+ # Define the range of K indexes for the line:
+ # $Kfirst = index of first token on line
+ # $Klast_out = index of last token on line
+ my ( $Kfirst, $Klast );
+ if (@K_array) {
+ $Kfirst = $K_array[0];
+ $Klast = $K_array[-1];
+ $Klast_out = $Klast;
+ }
- my $comma_count = 0;
- my $arrow_count = 0;
- if ( $type eq '}' || $type eq ')' ) {
- $comma_count = $gnu_comma_count{$total_depth};
- $arrow_count = $gnu_arrow_count{$total_depth};
- $comma_count = 0 unless $comma_count;
- $arrow_count = 0 unless $arrow_count;
- }
- $gnu_stack[$max_gnu_stack_index]
- ->set_comma_count($comma_count);
- $gnu_stack[$max_gnu_stack_index]
- ->set_arrow_count($arrow_count);
+ # It is only safe to trim the actual line text if the input
+ # line had a terminal blank token. Otherwise, we may be
+ # in a quote.
+ if ( $line_of_tokens->{_ended_in_blank_token} ) {
+ $line_of_tokens->{_line_text} =~ s/\s+$//;
+ }
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
- if ( $available_spaces > 0 ) {
+ # Deleting semicolons can create new empty code lines
+ # which should be marked as blank
+ if ( !defined($Kfirst) ) {
+ my $code_type = $line_of_tokens->{_code_type};
+ if ( !$code_type ) {
+ $line_of_tokens->{_code_type} = 'BL';
+ }
+ }
+ }
+ }
- if ( $comma_count <= 0 || $arrow_count > 0 ) {
+ # There shouldn't be any nodes beyond the last one unless we start
+ # allowing 'link_after' calls
+ if ( defined($inext) ) {
- my $i =
- $gnu_stack[$max_gnu_stack_index]->get_index();
- my $seqno =
- $gnu_stack[$max_gnu_stack_index]
- ->get_sequence_number();
+ Fault("unexpected tokens at end of file when reconstructing lines");
+ }
- # Be sure this item was created in this batch. This
- # should be true because we delete any available
- # space from open items at the end of each batch.
- if ( $gnu_sequence_number != $seqno
- || $i > $max_gnu_item_index )
- {
- warning(
-"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
- );
- report_definite_bug();
- }
+ return;
+}
- else {
- if ( $arrow_count == 0 ) {
- $gnu_item_list[$i]
- ->permanently_decrease_available_spaces(
- $available_spaces);
- }
- else {
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces(
- $available_spaces);
- }
- foreach my $j ( $i + 1 .. $max_gnu_item_index )
- {
- $gnu_item_list[$j]
- ->decrease_SPACES($available_spaces);
- }
- }
- }
- }
+sub weld_containers {
- # go down one level
- --$max_gnu_stack_index;
- $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
- $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+ # Called once per file to do any welding operations requested by --weld*
+ # flags.
+ my ($self) = @_;
- # stop when we reach a level at or below the current level
- if ( $lev <= $level && $ci_lev <= $ci_level ) {
- $space_count =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
- $current_level = $lev;
- $current_ci_level = $ci_lev;
- last;
- }
- }
+ return if ( $rOpts->{'indent-only'} );
+ return unless ( $rOpts->{'add-newlines'} );
- # reached bottom of stack .. should never happen because
- # only negative levels can get here, and $level was forced
- # to be positive above.
- else {
- warning(
-"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
- );
- report_definite_bug();
- last;
- }
- }
- }
+ if ( $rOpts->{'weld-nested-containers'} ) {
- # handle increasing depth
- if ( $level > $current_level || $ci_level > $current_ci_level ) {
+ # if called, weld_nested_containers must be called before other weld
+ # operations. # This is because weld_nested_containers could overwrite
+ # hash values written by weld_cuddled_blocks and weld_nested_quotes.
+ $self->weld_nested_containers();
- # Compute the standard incremental whitespace. This will be
- # the minimum incremental whitespace that will be used. This
- # choice results in a smooth transition between the gnu-style
- # and the standard style.
- my $standard_increment =
- ( $level - $current_level ) *
- $rOpts_indent_columns +
- ( $ci_level - $current_ci_level ) *
- $rOpts_continuation_indentation;
+ $self->weld_nested_quotes();
+ }
- # Now we have to define how much extra incremental space
- # ("$available_space") we want. This extra space will be
- # reduced as necessary when long lines are encountered or when
- # it becomes clear that we do not have a good list.
- my $available_space = 0;
- my $align_paren = 0;
- my $excess = 0;
+ # Note that weld_nested_containers() changes the _LEVEL_ values, so
+ # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
- # initialization on empty stack..
- if ( $max_gnu_stack_index == 0 ) {
- $space_count = $level * $rOpts_indent_columns;
- }
+ # Here is a good test case to Be sure that both cuddling and welding
+ # are working and not interfering with each other: <<snippets/ce_wn1.in>>
- # if this is a BLOCK, add the standard increment
- elsif ($last_nonblank_block_type) {
- $space_count += $standard_increment;
- }
+ # perltidy -wn -ce
- # if last nonblank token was not structural indentation,
- # just use standard increment
- elsif ( $last_nonblank_type ne '{' ) {
- $space_count += $standard_increment;
- }
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
- # otherwise use the space to the first non-blank level change token
- else {
+ $self->weld_cuddled_blocks();
- $space_count = $gnu_position_predictor;
+ return;
+}
- my $min_gnu_indentation =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
+sub cumulative_length_before_K {
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+}
- $available_space = $space_count - $min_gnu_indentation;
- if ( $available_space >= $standard_increment ) {
- $min_gnu_indentation += $standard_increment;
- }
- elsif ( $available_space > 1 ) {
- $min_gnu_indentation += $available_space + 1;
- }
- elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
- if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
- $min_gnu_indentation += 2;
- }
- else {
- $min_gnu_indentation += 1;
- }
- }
- else {
- $min_gnu_indentation += $standard_increment;
- }
- $available_space = $space_count - $min_gnu_indentation;
+sub cumulative_length_after_K {
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+}
- if ( $available_space < 0 ) {
- $space_count = $min_gnu_indentation;
- $available_space = 0;
- }
- $align_paren = 1;
- }
+sub weld_cuddled_blocks {
+ my ($self) = @_;
- # update state, but not on a blank token
- if ( $types_to_go[$max_index_to_go] ne 'b' ) {
+ # Called once per file to handle cuddled formatting
- $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
+ my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
- ++$max_gnu_stack_index;
- $gnu_stack[$max_gnu_stack_index] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, $align_paren );
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+ return unless ( %{$rcuddled_block_types} );
- # If the opening paren is beyond the half-line length, then
- # we will use the minimum (standard) indentation. This will
- # help avoid problems associated with running out of space
- # near the end of a line. As a result, in deeply nested
- # lists, there will be some indentations which are limited
- # to this minimum standard indentation. But the most deeply
- # nested container will still probably be able to shift its
- # parameters to the right for proper alignment, so in most
- # cases this will not be noticeable.
- if ( $available_space > 0 && $space_count > $halfway ) {
- $gnu_stack[$max_gnu_stack_index]
- ->tentatively_decrease_available_spaces($available_space);
- }
- }
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $rbreak_container = $self->[_rbreak_container_];
- # Count commas and look for non-list characters. Once we see a
- # non-list character, we give up and don't look for any more commas.
- if ( $type eq '=>' ) {
- $gnu_arrow_count{$total_depth}++;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # tentatively treating '=>' like '=' for estimating breaks
- # TODO: this could use some experimentation
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
- elsif ( $type eq ',' ) {
- $gnu_comma_count{$total_depth}++;
- }
+ my $is_broken_block = sub {
- elsif ( $is_assignment{$type} ) {
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ # a block is broken if the input line numbers of the braces differ
+ # we can only cuddle between broken blocks
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
- # this token might start a new line
- # if this is a non-blank..
- if ( $type ne 'b' ) {
+ # A stack to remember open chains at all levels: This is a hash rather than
+ # an array for safety because negative levels can occur in files with
+ # errors. This allows us to keep processing with negative levels.
+ # $in_chain{$level} = [$chain_type, $type_sequence];
+ my %in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
- # and if ..
- if (
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+ Fault("sequence = $type_sequence not defined at K=$KK");
+ }
- # this is the first nonblank token of the line
- $max_index_to_go == 1 && $types_to_go[0] eq 'b'
+ # We use the original levels because they get changed by sub
+ # 'weld_nested_containers'. So if this were to be called before that
+ # routine, the levels would be wrong and things would go bad.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_TRUE_];
- # or previous character was one of these:
- || $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
+ if ( $level < $last_level ) { $in_chain{$last_level} = undef }
+ elsif ( $level > $last_level ) { $in_chain{$level} = undef }
- # or previous character was opening and this does not close it
- || ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
- || ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
- # or this token is one of these:
- || $type =~ /^([\.]|\|\||\&\&)$/
+ if ( $token eq '{' ) {
- # or this is a closing structure
- || ( $last_nonblank_type_in_batch eq '}'
- && $last_nonblank_token_in_batch eq
- $last_nonblank_type_in_batch )
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ if ( !$block_type ) {
- # or previous token was keyword 'return'
- || (
- $last_nonblank_type_in_batch eq 'k'
- && ( $last_nonblank_token_in_batch eq 'return'
- && $type ne '{' )
- )
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
- # or starting a new line at certain keywords is fine
- || ( $type eq 'k'
- && $is_if_unless_and_or_last_next_redo_return{$token} )
+ }
+ if ( $in_chain{$level} ) {
- # or this is after an assignment after a closing structure
- || (
- $is_assignment{$last_nonblank_type_in_batch}
- && (
- $last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain{$level}->[1];
+ my $opening_seqno = $type_sequence;
- # and it is significantly to the right
- || $gnu_position_predictor > $halfway
- )
- )
- )
- {
- check_for_long_gnu_style_lines($max_index_to_go);
- $line_start_index_to_go = $max_index_to_go;
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$is_broken_block->($closing_seqno) ) {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
- # back up 1 token if we want to break before that type
- # otherwise, we may strand tokens like '?' or ':' on a line
- if ( $line_start_index_to_go > 0 ) {
- if ( $last_nonblank_type_in_batch eq 'k' ) {
+ # we will let the trailing block be either broken or intact
+ ## && $is_broken_block->($opening_seqno);
- if ( $want_break_before{$last_nonblank_token_in_batch} )
- {
- $line_start_index_to_go--;
- }
- }
- elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
- $line_start_index_to_go--;
- }
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon;
+ if ( defined($Ko) ) {
+ $Kon = $self->K_next_nonblank($Ko);
+ }
+
+ # ..unless it is a comment
+ if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
+ my $dlen =
+ $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
+ $rweld_len_right_closing->{$closing_seqno} = $dlen;
+
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
}
+
}
- }
+ else {
- # remember the predicted position of this token on the output line
- if ( $max_index_to_go > $line_start_index_to_go ) {
- $gnu_position_predictor =
- total_line_length( $line_start_index_to_go, $max_index_to_go );
- }
- else {
- $gnu_position_predictor =
- $space_count + $token_lengths_to_go[$max_index_to_go];
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ }
}
+ elsif ( $token eq '}' ) {
+ if ( $in_chain{$level} ) {
- # store the indentation object for this token
- # this allows us to manipulate the leading whitespace
- # (in case we have to reduce indentation to fit a line) without
- # having to change any token values
- $leading_spaces_to_go[$max_index_to_go] =
- $gnu_stack[$max_gnu_stack_index];
- $reduced_spaces_to_go[$max_index_to_go] =
- ( $max_gnu_stack_index > 0 && $ci_level )
- ? $gnu_stack[ $max_gnu_stack_index - 1 ]
- : $gnu_stack[$max_gnu_stack_index];
- return;
- }
-
- sub check_for_long_gnu_style_lines {
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_code($KK);
+ next unless $Knn;
- # look at the current estimated maximum line length, and
- # remove some whitespace if it exceeds the desired maximum
- my ($mx_index_to_go) = @_;
+ my $chain_type = $in_chain{$level}->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
- # this is only for the '-lp' style
- return unless ($rOpts_line_up_parentheses);
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain{$level}->[1] = $type_sequence;
+ }
+ else { $in_chain{$level} = undef }
+ }
+ }
+ }
+ return;
+}
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+sub weld_nested_containers {
+ my ($self) = @_;
- # see if we have exceeded the maximum desired line length
- # keep 2 extra free because they are needed in some cases
- # (result of trial-and-error testing)
- my $spaces_needed =
- $gnu_position_predictor - maximum_line_length($mx_index_to_go) + 2;
+ # Called once per file for option '--weld-nested-containers'
- return if ( $spaces_needed <= 0 );
+ my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
+ my $rweld_len_left_opening = $self->[_rweld_len_left_opening_];
+ my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
+ my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
- # We are over the limit, so try to remove a requested number of
- # spaces from leading whitespace. We are only allowed to remove
- # from whitespace items created on this batch, since others have
- # already been used and cannot be undone.
- my @candidates = ();
- my $i;
+ # This routine implements the -wn flag by "welding together"
+ # the nested closing and opening tokens which were previously
+ # identified by sub 'find_nested_pairs'. "welding" simply
+ # involves setting certain hash values which will be checked
+ # later during formatting.
- # loop over all whitespace items created for the current batch
- for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
- my $item = $gnu_item_list[$i];
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->get_rLL_max_index();
+ my $rnested_pairs = $self->[_rnested_pairs_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # item must still be open to be a candidate (otherwise it
- # cannot influence the current token)
- next if ( $item->get_closed() >= 0 );
+ # Return unless there are nested pairs to weld
+ return unless defined($rnested_pairs) && @{$rnested_pairs};
- my $available_spaces = $item->get_available_spaces();
+ # This array will hold the sequence numbers of the tokens to be welded.
+ my @welds;
- if ( $available_spaces > 0 ) {
- push( @candidates, [ $i, $available_spaces ] );
- }
- }
+ # Variables needed for estimating line lengths
+ my $starting_indent;
+ my $starting_lentot;
- return unless (@candidates);
+ # A tolerance to the length for length estimates. In some rare cases
+ # this can avoid problems where a final weld slightly exceeds the
+ # line length and gets broken in a bad spot.
+ my $length_tol = 1;
- # sort by available whitespace so that we can remove whitespace
- # from the maximum available first
- @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
+ my $excess_length_to_K = sub {
+ my ($K) = @_;
- # keep removing whitespace until we are done or have no more
- foreach my $candidate (@candidates) {
- my ( $i, $available_spaces ) = @{$candidate};
- my $deleted_spaces =
- ( $available_spaces > $spaces_needed )
- ? $spaces_needed
- : $available_spaces;
+ # Estimate the length from the line start to a given token
+ my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
+ my $excess_length =
+ $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
+ return ($excess_length);
+ };
- # remove the incremental space from this item
- $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
- my $i_debug = $i;
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
- # update the leading whitespace of this item and all items
- # that came after it
- for ( ; $i <= $max_gnu_item_index ; $i++ ) {
+ # Abbreviations:
+ # _oo=outer opening, i.e. first of { {
+ # _io=inner opening, i.e. second of { {
+ # _oc=outer closing, i.e. second of } {
+ # _ic=inner closing, i.e. first of } }
- my $old_spaces = $gnu_item_list[$i]->get_spaces();
- if ( $old_spaces >= $deleted_spaces ) {
- $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
- }
+ my $previous_pair;
- # shouldn't happen except for code bug:
- else {
- my $level = $gnu_item_list[$i_debug]->get_level();
- my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
- my $old_level = $gnu_item_list[$i]->get_level();
- my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
- warning(
-"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
- );
- report_definite_bug();
- }
- }
- $gnu_position_predictor -= $deleted_spaces;
- $spaces_needed -= $deleted_spaces;
- last unless ( $spaces_needed > 0 );
- }
- return;
- }
+ # We are working from outermost to innermost pairs so that
+ # level changes will be complete when we arrive at the inner pairs.
- sub finish_lp_batch {
+ while ( my $item = pop( @{$rnested_pairs} ) ) {
+ my ( $inner_seqno, $outer_seqno ) = @{$item};
- # This routine is called once after each output stream batch is
- # finished to undo indentation for all incomplete -lp
- # indentation levels. It is too risky to leave a level open,
- # because then we can't backtrack in case of a long line to follow.
- # This means that comments and blank lines will disrupt this
- # indentation style. But the vertical aligner may be able to
- # get the space back if there are side comments.
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $Kinner_opening = $K_opening_container->{$inner_seqno};
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $K_closing_container->{$inner_seqno};
- # this is only for the 'lp' style
- return unless ($rOpts_line_up_parentheses);
+ my $outer_opening = $rLL->[$Kouter_opening];
+ my $inner_opening = $rLL->[$Kinner_opening];
+ my $outer_closing = $rLL->[$Kouter_closing];
+ my $inner_closing = $rLL->[$Kinner_closing];
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ my $iline_oo = $outer_opening->[_LINE_INDEX_];
+ my $iline_io = $inner_opening->[_LINE_INDEX_];
+ my $iline_ic = $inner_closing->[_LINE_INDEX_];
- # loop over all whitespace items created for the current batch
- foreach my $i ( 0 .. $max_gnu_item_index ) {
- my $item = $gnu_item_list[$i];
+ # Set flag saying if this pair starts a new weld
+ my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
- # only look for open items
- next if ( $item->get_closed() >= 0 );
+ # Set flag saying if this pair is adjacent to the previous nesting pair
+ # (even if previous pair was rejected as a weld)
+ my $touch_previous_pair =
+ defined($previous_pair) && $outer_seqno == $previous_pair->[0];
+ $previous_pair = $item;
- # Tentatively remove all of the available space
- # (The vertical aligner will try to get it back later)
- my $available_spaces = $item->get_available_spaces();
- if ( $available_spaces > 0 ) {
+ # Set a flag if we should not weld. It sometimes looks best not to weld
+ # when the opening and closing tokens are very close. However, there
+ # is a danger that we will create a "blinker", which oscillates between
+ # two semi-stable states, if we do not weld. So the rules for
+ # not welding have to be carefully defined and tested.
+ my $do_not_weld;
+ if ( !$touch_previous_pair ) {
- # delete incremental space for this item
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces($available_spaces);
+ # If this pair is not adjacent to the previous pair (skipped or
+ # not), then measure lengths from the start of line of oo
- # Reduce the total indentation space of any nodes that follow
- # Note that any such nodes must necessarily be dependents
- # of this node.
- foreach ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$_]->decrease_SPACES($available_spaces);
- }
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ $starting_lentot =
+ $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
}
- }
- return;
- }
-} ## end closure set_leading_whitespace
-sub reduce_lp_indentation {
+ # DO-NOT-WELD RULE 1:
+ # Do not weld something that looks like the start of a two-line
+ # function call, like this: <<snippets/wn6.in>>
+ # $trans->add_transformation(
+ # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
+ # We will look for a semicolon after the closing paren.
- # reduce the leading whitespace at token $i if possible by $spaces_needed
- # (a large value of $spaces_needed will remove all excess space)
- # NOTE: to be called from scan_list only for a sequence of tokens
- # contained between opening and closing parens/braces/brackets
+ # We want to weld something complex, like this though
+ # my $compass = uc( opposite_direction( line_to_canvas_direction(
+ # @{ $coords[0] }, @{ $coords[1] } ) ) );
+ # Otherwise we will get a 'blinker'. For example, the following
+ # would become a blinker without this rule:
+ # $Self->_Add( $SortOrderDisplay{ $Field
+ # ->GenerateFieldForSelectSQL() } );
+ # But it is okay to weld a two-line statement if it looks like
+ # it was already welded, meaning that the two opening containers are
+ # on a different line that the two closing containers. This is
+ # necessary to prevent blinking of something like this with
+ # perltidy -wn -pbp (starting indentation two levels deep):
- my ( $self, $i, $spaces_wanted ) = @_;
- my $deleted_spaces = 0;
+ # $top_label->set_text( gettext(
+ # "Unable to create personal directory - check permissions.") );
- my $item = $leading_spaces_to_go[$i];
- my $available_spaces = $item->get_available_spaces();
+ my $iline_oc = $outer_closing->[_LINE_INDEX_];
+ my $token_oo = $outer_opening->[_TOKEN_];
+ if ( $iline_oc == $iline_oo + 1
+ && $iline_io == $iline_ic
+ && $token_oo eq '(' )
+ {
- if (
- $available_spaces > 0
- && ( ( $spaces_wanted <= $available_spaces )
- || !$item->get_have_child() )
- )
- {
+ # Look for following semicolon...
+ my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
+ my $next_nonblank_type =
+ defined($Knext_nonblank)
+ ? $rLL->[$Knext_nonblank]->[_TYPE_]
+ : 'b';
+ if ( $next_nonblank_type eq ';' ) {
- # we'll remove these spaces, but mark them as recoverable
- $deleted_spaces =
- $item->tentatively_decrease_available_spaces($spaces_wanted);
- }
+ # Then do not weld if no other containers between inner
+ # opening and closing.
+ my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
+ if ( $Knext_seq_item == $Kinner_closing ) {
+ $do_not_weld ||= 1;
+ }
+ }
+ }
+ }
- return $deleted_spaces;
-}
+ # DO-NOT-WELD RULE 2:
+ # Do not weld an opening paren to an inner one line brace block
+ # We will just use old line numbers for this test and require
+ # iterations if necessary for convergence
-sub total_line_length {
+ # For example, otherwise we could cause the opening paren
+ # in the following example to separate from the caller name
+ # as here:
- # return length of a line of tokens ($ibeg .. $iend)
- my ( $ibeg, $iend ) = @_;
- return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
-}
+ # $_[0]->code_handler
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
-sub maximum_line_length_for_level {
+ # Here is another example where we do not want to weld:
+ # $wrapped->add_around_modifier(
+ # sub { push @tracelog => 'around 1'; $_[0]->(); } );
- # return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
+ # If the one line sub block gets broken due to length or by the
+ # user, then we can weld. The result will then be:
+ # $wrapped->add_around_modifier( sub {
+ # push @tracelog => 'around 1';
+ # $_[0]->();
+ # } );
- # Modify if -vmll option is selected
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
- if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
- }
- return $maximum_line_length;
-}
+ if ( $iline_ic == $iline_io ) {
-sub excess_line_length {
+ my $token_oo = $outer_opening->[_TOKEN_];
+ $do_not_weld ||= $token_oo eq '(';
+ }
- # return number of characters by which a line of tokens ($ibeg..$iend)
- # exceeds the allowable line length.
- my ( $self, $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
+ # DO-NOT-WELD RULE 3:
+ # Do not weld if this makes our line too long
+ $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
- # Include left and right weld lengths unless requested not to
- my $wl = $ignore_left_weld ? 0 : $self->weld_len_left_to_go($iend);
- my $wr = $ignore_right_weld ? 0 : $self->weld_len_right_to_go($iend);
+ # DO-NOT-WELD RULE 4; implemented for git#10:
+ # Do not weld an opening -ce brace if the next container is on a single
+ # line, different from the opening brace. (This is very rare). For
+ # example, given the following with -ce, we will avoid joining the {
+ # and [
- return total_line_length( $ibeg, $iend ) + $wl + $wr -
- maximum_line_length($ibeg);
-}
+ # } else {
+ # [ $_, length($_) ]
+ # }
-sub wrapup {
+ # because this would produce a terminal one-line block:
- # This is the last routine called when a file is formatted.
- # Flush buffer and write any informative messages
- my $self = shift;
+ # } else { [ $_, length($_) ] }
- $self->flush();
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->decrement_output_line_number()
- ; # fix up line number since it was incremented
- we_are_at_the_last_line();
- my $added_semicolon_count = $self->[_added_semicolon_count_];
- my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
- my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
+ # which may not be what is desired. But given this input:
- if ( $added_semicolon_count > 0 ) {
- my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
- write_logfile_entry("$added_semicolon_count $what added:\n");
- write_logfile_entry(
- " $first at input line $first_added_semicolon_at\n");
+ # } else { [ $_, length($_) ] }
- if ( $added_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_added_semicolon_at\n");
+ # then we will do the weld and retain the one-line block
+ if ( $rOpts->{'cuddled-else'} ) {
+ my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
+ if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
+ my $io_line = $inner_opening->[_LINE_INDEX_];
+ my $ic_line = $inner_closing->[_LINE_INDEX_];
+ my $oo_line = $outer_opening->[_LINE_INDEX_];
+ $do_not_weld ||=
+ ( $oo_line < $io_line && $ic_line == $io_line );
+ }
}
- write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
- write_logfile_entry("\n");
- }
- my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
- my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
- my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
- if ( $deleted_semicolon_count > 0 ) {
- my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $deleted_semicolon_count > 1 )
- ? "semicolons were"
- : "semicolon was";
- write_logfile_entry(
- "$deleted_semicolon_count unnecessary $what deleted:\n");
- write_logfile_entry(
- " $first at input line $first_deleted_semicolon_at\n");
+ if ($do_not_weld) {
- if ( $deleted_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_deleted_semicolon_at\n");
+ # After neglecting a pair, we start measuring from start of point io
+ $starting_lentot =
+ $self->cumulative_length_before_K($Kinner_opening);
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $inner_opening->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
+
+ # Normally, a broken pair should not decrease indentation of
+ # intermediate tokens:
+ ## if ( $last_pair_broken ) { next }
+ # However, for long strings of welded tokens, such as '{{{{{{...'
+ # we will allow broken pairs to also remove indentation.
+ # This will keep very long strings of opening and closing
+ # braces from marching off to the right. We will do this if the
+ # number of tokens in a weld before the broken weld is 4 or more.
+ # This rule will mainly be needed for test scripts, since typical
+ # welds have fewer than about 4 welded tokens.
+ if ( !@welds || @{ $welds[-1] } < 4 ) { next }
}
- write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
- write_logfile_entry("\n");
- }
- my $embedded_tab_count = $self->[_embedded_tab_count_];
- my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
- my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
- if ( $embedded_tab_count > 0 ) {
- my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
- my $what =
- ( $embedded_tab_count > 1 )
- ? "quotes or patterns"
- : "quote or pattern";
- write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
- write_logfile_entry(
-"This means the display of this script could vary with device or software\n"
- );
- write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
+ # otherwise start new weld ...
+ elsif ($starting_new_weld) {
+ push @welds, $item;
+ }
- if ( $embedded_tab_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_embedded_tab_at\n");
+ # ... or extend current weld
+ else {
+ unshift @{ $welds[-1] }, $inner_seqno;
+ }
+
+ # After welding, reduce the indentation level if all intermediate tokens
+ my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
+ if ( $dlevel != 0 ) {
+ my $Kstart = $Kinner_opening;
+ my $Kstop = $Kinner_closing;
+ for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
+ $rLL->[$KK]->[_LEVEL_] += $dlevel;
+ }
}
- write_logfile_entry("\n");
}
- my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
- my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
- my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
- my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
+ # Define weld lengths needed later to set line breaks
+ foreach my $item (@welds) {
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
- );
- }
+ # sweep from inner to outer
- my $first_btd = $self->[_first_brace_tabbing_disagreement_];
- if ($first_btd) {
- my $msg =
-"First closing brace indentation disagreement started at input line $first_btd\n";
- write_logfile_entry($msg);
+ my $inner_seqno;
+ my $len_close = 0;
+ my $len_open = 0;
+ foreach my $outer_seqno ( @{$item} ) {
+ if ($inner_seqno) {
- # leave a hint in the .ERR file if there was a brace error
- if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
- }
+ my $dlen_opening =
+ $length_to_opening_seqno->($inner_seqno) -
+ $length_to_opening_seqno->($outer_seqno);
- my $in_btd = $self->[_in_brace_tabbing_disagreement_];
- if ($in_btd) {
- my $msg =
-"Ending with brace indentation disagreement which started at input line $in_btd\n";
- write_logfile_entry($msg);
+ my $dlen_closing =
+ $length_to_closing_seqno->($outer_seqno) -
+ $length_to_closing_seqno->($inner_seqno);
- # leave a hint in the .ERR file if there was a brace error
- if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
- }
+ $len_open += $dlen_opening;
+ $len_close += $dlen_closing;
- if ($in_tabbing_disagreement) {
- my $msg =
-"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
- write_logfile_entry($msg);
- }
- else {
+ }
- if ($last_tabbing_disagreement) {
+ $rweld_len_left_closing->{$outer_seqno} = $len_close;
+ $rweld_len_right_opening->{$outer_seqno} = $len_open;
- write_logfile_entry(
-"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
- );
- }
- else {
- write_logfile_entry("No indentation disagreement seen\n");
+ $inner_seqno = $outer_seqno;
}
- }
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
- );
+ # sweep from outer to inner
+ foreach my $seqno ( reverse @{$item} ) {
+ $rweld_len_right_closing->{$seqno} =
+ $len_close - $rweld_len_left_closing->{$seqno};
+ $rweld_len_left_opening->{$seqno} =
+ $len_open - $rweld_len_right_opening->{$seqno};
+ }
}
- write_logfile_entry("\n");
-
- my $vao = $self->[_vertical_aligner_object_];
- $vao->report_anything_unusual();
- $file_writer_object->report_line_length_errors();
+ #####################################
+ # DEBUG
+ #####################################
+ if (0) {
+ my $count = 0;
+ local $" = ')(';
+ foreach my $weld (@welds) {
+ print "\nWeld number $count has seq: (@{$weld})\n";
+ foreach my $seq ( @{$weld} ) {
+ print <<EOM;
+ seq=$seq
+ left_opening=$rweld_len_left_opening->{$seq};
+ right_opening=$rweld_len_right_opening->{$seq};
+ left_closing=$rweld_len_left_closing->{$seq};
+ right_closing=$rweld_len_right_closing->{$seq};
+EOM
+ }
+ $count++;
+ }
+ }
return;
}
-sub check_options {
-
- # This routine is called to check the user-supplied run parameters
- # and to configure the control hashes to them.
- $rOpts = shift;
+sub weld_nested_quotes {
- initialize_whitespace_hashes();
- initialize_bond_strength_hashes();
+ # Called once per file for option '--weld-nested-containers'. This
+ # does welding on qw quotes.
- # Make needed regex patterns for matching text.
- # NOTE: sub_matching_patterns must be made first because later patterns use
- # them; see RT #133130.
- make_sub_matching_pattern();
- make_static_block_comment_pattern();
- make_static_side_comment_pattern();
- make_closing_side_comment_prefix();
- make_closing_side_comment_list_pattern();
- $format_skipping_pattern_begin =
- make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
- $format_skipping_pattern_end =
- make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
- make_non_indenting_brace_pattern();
+ my $self = shift;
- # If closing side comments ARE selected, then we can safely
- # delete old closing side comments unless closing side comment
- # warnings are requested. This is a good idea because it will
- # eliminate any old csc's which fall below the line count threshold.
- # We cannot do this if warnings are turned on, though, because we
- # might delete some text which has been added. So that must
- # be handled when comments are created.
- if ( $rOpts->{'closing-side-comments'} ) {
- if ( !$rOpts->{'closing-side-comment-warnings'} ) {
- $rOpts->{'delete-closing-side-comments'} = 1;
- }
- }
+ my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
+ my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
- # If closing side comments ARE NOT selected, but warnings ARE
- # selected and we ARE DELETING csc's, then we will pretend to be
- # adding with a huge interval. This will force the comments to be
- # generated for comparison with the old comments, but not added.
- elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
- if ( $rOpts->{'delete-closing-side-comments'} ) {
- $rOpts->{'delete-closing-side-comments'} = 0;
- $rOpts->{'closing-side-comments'} = 1;
- $rOpts->{'closing-side-comment-interval'} = 100000000;
- }
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- make_bli_pattern();
- make_block_brace_vertical_tightness_pattern();
- make_blank_line_pattern();
- make_keyword_group_list_pattern();
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rlines = $self->[_rlines_];
- # Make initial list of desired one line block types
- # They will be modified by 'prepare_cuddled_block_types'
- %want_one_line_block = %is_sort_map_grep_eval;
+ my $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
- prepare_cuddled_block_types();
- if ( $rOpts->{'dump-cuddled-block-list'} ) {
- dump_cuddled_block_list(*STDOUT);
- Exit(0);
- }
+ my $is_single_quote = sub {
+ my ( $Kbeg, $Kend, $quote_type ) = @_;
+ foreach my $K ( $Kbeg .. $Kend ) {
+ my $test_type = $rLL->[$K]->[_TYPE_];
+ next if ( $test_type eq 'b' );
+ return if ( $test_type ne $quote_type );
+ }
+ return 1;
+ };
- if ( $rOpts->{'line-up-parentheses'} ) {
+ my $excess_line_length_K = sub {
+ my ( $KK, $Ktest ) = @_;
- if ( $rOpts->{'indent-only'}
- || !$rOpts->{'add-newlines'}
- || !$rOpts->{'delete-old-newlines'} )
- {
- Warn(<<EOM);
------------------------------------------------------------------------
-Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
-
-The -lp indentation logic requires that perltidy be able to coordinate
-arbitrarily large numbers of line breakpoints. This isn't possible
-with these flags.
------------------------------------------------------------------------
-EOM
- $rOpts->{'line-up-parentheses'} = 0;
+ # what is the excess length if we add token $Ktest to the line with $KK?
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $starting_lentot =
+ $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $starting_indent = 0;
+ my $length_tol = 1;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
}
- if ( $rOpts->{'whitespace-cycle'} ) {
- Warn(<<EOM);
-Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
-EOM
- $rOpts->{'whitespace-cycle'} = 0;
+ my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
+ my $excess_length =
+ $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
+ return $excess_length;
+ };
+
+ # look for single qw quotes nested in containers
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$outer_seqno ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+ Fault("sequence = $outer_seqno not defined at K=$KK");
}
- }
- # At present, tabs are not compatible with the line-up-parentheses style
- # (it would be possible to entab the total leading whitespace
- # just prior to writing the line, if desired).
- if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ my $token = $rtoken_vars->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
- # Likewise, tabs are not compatible with outdenting..
- if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ # see if the next token is a quote of some type
+ my $Kn = $self->K_next_nonblank($KK);
+ next unless $Kn;
+ my $next_token = $rLL->[$Kn]->[_TOKEN_];
+ my $next_type = $rLL->[$Kn]->[_TYPE_];
+ next
+ unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
+ && $next_token =~ /^q/ );
- if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ # The token before the closing container must also be a quote
+ my $K_closing = $K_closing_container->{$outer_seqno};
+ my $Kt_end = $self->K_previous_nonblank($K_closing);
+ next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
- if ( !$rOpts->{'space-for-semicolon'} ) {
- $want_left_space{'f'} = -1;
- }
+ # Do not weld to single-line quotes. Nothing is gained, and it may
+ # look bad.
+ next if ( $Kt_end == $Kn );
- if ( $rOpts->{'space-terminal-semicolon'} ) {
- $want_left_space{';'} = 1;
- }
+ # Only weld to quotes delimited with container tokens. This is
+ # because welding to arbitrary quote delimiters can produce code
+ # which is less readable than without welding.
+ my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
+ next
+ unless ( $is_closing_token{$closing_delimiter}
+ || $closing_delimiter eq '>' );
- # implement outdenting preferences for keywords
- %outdent_keyword = ();
- my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
- unless (@okw) {
- @okw = qw(next last redo goto return); # defaults
- }
+ # Now make sure that there is just a single quote in the container
+ next
+ unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
- # FUTURE: if not a keyword, assume that it is an identifier
- foreach (@okw) {
- if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
- $outdent_keyword{$_} = 1;
- }
- else {
- Warn("ignoring '$_' in -okwl list; not a perl keyword");
- }
- }
+ # If welded, the line must not exceed allowed line length
+ # Assume old line breaks for this estimate.
+ next if ( $excess_line_length_K->( $KK, $Kn ) > 0 );
- # setup hash for -kpit option
- %keyword_paren_inner_tightness = ();
- my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
- if ( defined($kpit_value) && $kpit_value != 1 ) {
- my @kpit =
- split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
- unless (@kpit) {
- @kpit = qw(if elsif unless while until for foreach); # defaults
- }
+ # OK to weld
+ # FIXME: Are these always correct?
+ $rweld_len_left_closing->{$outer_seqno} = 1;
+ $rweld_len_right_opening->{$outer_seqno} = 2;
- # we will allow keywords and user-defined identifiers
- foreach (@kpit) {
- $keyword_paren_inner_tightness{$_} = $kpit_value;
+ # QW PATCH 1 (Testing)
+ # undo CI for welded quotes
+ foreach my $K ( $Kn .. $Kt_end ) {
+ $rLL->[$K]->[_CI_LEVEL_] = 0;
+ }
+
+ # Change the level of a closing qw token to be that of the outer
+ # containing token. This will allow -lp indentation to function
+ # correctly in the vertical aligner.
+ $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
}
}
+ return;
+}
- # implement user whitespace preferences
- if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
- @want_left_space{@q} = (1) x scalar(@q);
- }
+sub weld_len_left {
- if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
- @want_right_space{@q} = (1) x scalar(@q);
- }
+ my ( $self, $seqno, $type_or_tok ) = @_;
- if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
- @want_left_space{@q} = (-1) x scalar(@q);
- }
+ my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
+ my $rweld_len_left_opening = $self->[_rweld_len_left_opening_];
- if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
- @want_right_space{@q} = (-1) x scalar(@q);
- }
- if ( $rOpts->{'dump-want-left-space'} ) {
- dump_want_left_space(*STDOUT);
- Exit(0);
- }
+ # Given the sequence number of a token, and the token or its type,
+ # return the length of any weld to its left
- if ( $rOpts->{'dump-want-right-space'} ) {
- dump_want_right_space(*STDOUT);
- Exit(0);
+ my $weld_len;
+ if ($seqno) {
+ if ( $is_closing_type{$type_or_tok} ) {
+ $weld_len = $rweld_len_left_closing->{$seqno};
+ }
+ elsif ( $is_opening_type{$type_or_tok} ) {
+ $weld_len = $rweld_len_left_opening->{$seqno};
+ }
}
+ if ( !defined($weld_len) ) { $weld_len = 0 }
+ return $weld_len;
+}
- # default keywords for which space is introduced before an opening paren
- # (at present, including them messes up vertical alignment)
- my @sak = qw(my local our and or xor err eq ne if else elsif until
- unless while for foreach return switch case given when catch);
- %space_after_keyword = map { $_ => 1 } @sak;
-
- # first remove any or all of these if desired
- if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
+sub weld_len_right {
- # -nsak='*' selects all the above keywords
- if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
- @space_after_keyword{@q} = (0) x scalar(@q);
- }
+ my ( $self, $seqno, $type_or_tok ) = @_;
- # then allow user to add to these defaults
- if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
- @space_after_keyword{@q} = (1) x scalar(@q);
- }
+ my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
+ my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
- # implement user break preferences
- my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- . : ? && || and or err xor
- );
+ # Given the sequence number of a token, and the token or its type,
+ # return the length of any weld to its right
- my $break_after = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
- }
+ my $weld_len;
+ if ($seqno) {
+ if ( $is_closing_type{$type_or_tok} ) {
+ $weld_len = $rweld_len_right_closing->{$seqno};
}
- };
-
- my $break_before = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
- }
+ elsif ( $is_opening_type{$type_or_tok} ) {
+ $weld_len = $rweld_len_right_opening->{$seqno};
}
- };
+ }
+ if ( !defined($weld_len) ) { $weld_len = 0 }
+ return $weld_len;
+}
- $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
- $break_before->(@all_operators)
- if ( $rOpts->{'break-before-all-operators'} );
+sub weld_len_left_to_go {
+ my ( $self, $i ) = @_;
- $break_after->( split_words( $rOpts->{'want-break-after'} ) );
- $break_before->( split_words( $rOpts->{'want-break-before'} ) );
+ # Given the index of a token in the 'to_go' array
+ # return the length of any weld to its left
+ return if ( $i < 0 );
+ my $weld_len =
+ $self->weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
+ return $weld_len;
+}
- # make note if breaks are before certain key types
- %want_break_before = ();
- foreach my $tok ( @all_operators, ',' ) {
- $want_break_before{$tok} =
- $left_bond_strength{$tok} < $right_bond_strength{$tok};
- }
+sub weld_len_right_to_go {
+ my ( $self, $i ) = @_;
- # Coordinate ?/: breaks, which must be similar
- if ( !$want_break_before{':'} ) {
- $want_break_before{'?'} = $want_break_before{':'};
- $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
- $left_bond_strength{'?'} = NO_BREAK;
- }
+ # Given the index of a token in the 'to_go' array
+ # return the length of any weld to its right
+ return if ( $i < 0 );
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
+ my $weld_len =
+ $self->weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
+ return $weld_len;
+}
- # Only make a hash entry for the next parameters if values are defined.
- # That allows a quick check to be made later.
- %break_before_container_types = ();
- for ( $rOpts->{'break-before-hash-brace'} ) {
- $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
- }
- for ( $rOpts->{'break-before-square-bracket'} ) {
- $break_before_container_types{'['} = $_ if $_ && $_ > 0;
- }
- for ( $rOpts->{'break-before-paren'} ) {
- $break_before_container_types{'('} = $_ if $_ && $_ > 0;
- }
+sub mark_short_nested_blocks {
- %container_indentation_options = ();
- for ( $rOpts->{'break-before-hash-brace-and-indent'} ) {
- my $tok = '{';
- if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
- $container_indentation_options{$tok} = $_;
- }
- }
- for ( $rOpts->{'break-before-square-bracket-and-indent'} ) {
- my $tok = '[';
- if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
- $container_indentation_options{$tok} = $_;
- }
- }
- for ( $rOpts->{'break-before-paren-and-indent'} ) {
- my $tok = '(';
- if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
- $container_indentation_options{$tok} = $_;
- }
- }
+ # This routine looks at the entire file and marks any short nested blocks
+ # which should not be broken. The results are stored in the hash
+ # $rshort_nested->{$type_sequence}
+ # which will be true if the container should remain intact.
+ #
+ # For example, consider the following line:
- # Define here tokens which may follow the closing brace of a do statement
- # on the same line, as in:
- # } while ( $something);
- my @dof = qw(until while unless if ; : );
- push @dof, ',';
- @is_do_follower{@dof} = (1) x scalar(@dof);
+ # sub cxt_two { sort { $a <=> $b } test_if_list() }
- # What tokens may follow the closing brace of an if or elsif block?
- # Not used. Previously used for cuddled else, but no longer needed.
- %is_if_brace_follower = ();
+ # The 'sort' block is short and nested within an outer sub block.
+ # Normally, the existance of the 'sort' block will force the sub block to
+ # break open, but this is not always desirable. Here we will set a flag for
+ # the sort block to prevent this. To give the user control, we will
+ # follow the input file formatting. If either of the blocks is broken in
+ # the input file then we will allow it to remain broken. Otherwise we will
+ # set a flag to keep it together in later formatting steps.
- # nothing can follow the closing curly of an else { } block:
- %is_else_brace_follower = ();
+ # The flag which is set here will be checked in two places:
+ # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
- # what can follow a multi-line anonymous sub definition closing curly:
- my @asf = qw# ; : => or and && || ~~ !~~ ) #;
- push @asf, ',';
- @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
- # what can follow a one-line anonymous sub closing curly:
- # one-line anonymous subs also have ']' here...
- # see tk3.t and PP.pm
- my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
- push @asf1, ',';
- @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # What can follow a closing curly of a block
- # which is not an if/elsif/else/do/sort/map/grep/eval/sub
- # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
- my @obf = qw# ; : => or and && || ) #;
- push @obf, ',';
- @is_other_brace_follower{@obf} = (1) x scalar(@obf);
+ return unless ( $rOpts->{'one-line-block-nesting'} );
- $right_bond_strength{'{'} = WEAK;
- $left_bond_strength{'{'} = VERY_STRONG;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rlines = $self->[_rlines_];
- # make -l=0 equal to -l=infinite
- if ( !$rOpts->{'maximum-line-length'} ) {
- $rOpts->{'maximum-line-length'} = 1000000;
- }
+ # Variables needed for estimating line lengths
+ my $starting_indent;
+ my $starting_lentot;
+ my $length_tol = 1;
- # make -lbl=0 equal to -lbl=infinite
- if ( !$rOpts->{'long-block-line-count'} ) {
- $rOpts->{'long-block-line-count'} = 1000000;
- }
+ my $excess_length_to_K = sub {
+ my ($K) = @_;
- my $ole = $rOpts->{'output-line-ending'};
- if ($ole) {
- my %endings = (
- dos => "\015\012",
- win => "\015\012",
- mac => "\015",
- unix => "\012",
- );
+ # Estimate the length from the line start to a given token
+ my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
+ my $excess_length =
+ $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
+ return ($excess_length);
+ };
- # Patch for RT #99514, a memoization issue.
- # Normally, the user enters one of 'dos', 'win', etc, and we change the
- # value in the options parameter to be the corresponding line ending
- # character. But, if we are using memoization, on later passes through
- # here the option parameter will already have the desired ending
- # character rather than the keyword 'dos', 'win', etc. So
- # we must check to see if conversion has already been done and, if so,
- # bypass the conversion step.
- my %endings_inverted = (
- "\015\012" => 'dos',
- "\015\012" => 'win',
- "\015" => 'mac',
- "\012" => 'unix',
- );
+ my $is_broken_block = sub {
- if ( defined( $endings_inverted{$ole} ) ) {
+ # a block is broken if the input line numbers of the braces differ
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
- # we already have valid line ending, nothing more to do
- }
- else {
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- Die(<<EOM);
-Unrecognized line ending '$ole'; expecting one of: $str
-EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- Warn("Ignoring -ple; conflicts with -ole\n");
- $rOpts->{'preserve-line-endings'} = undef;
- }
+ # loop over all containers
+ my @open_block_stack;
+ my $iline = -1;
+ my $KNEXT = 0;
+ my $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+
+ # an error here is most likely due to a recent programming change
+ Fault("sequence = $type_sequence not defined at K=$KK");
}
- }
- # hashes used to simplify setting whitespace
- %tightness = (
- '{' => $rOpts->{'brace-tightness'},
- '}' => $rOpts->{'brace-tightness'},
- '(' => $rOpts->{'paren-tightness'},
- ')' => $rOpts->{'paren-tightness'},
- '[' => $rOpts->{'square-bracket-tightness'},
- ']' => $rOpts->{'square-bracket-tightness'},
- );
- %matching_token = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '?' => ':',
- );
+ # We are just looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ next unless ($block_type);
- if ( $rOpts->{'ignore-old-breakpoints'} ) {
- if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
- );
- }
- if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
- );
- }
- if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
- );
- }
+ # Keep a stack of all acceptable block braces seen.
+ # Only consider blocks entirely on one line so dump the stack when line
+ # changes.
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline != $iline_last ) { @open_block_stack = () }
- # Note: there are additional parameters that can be made inactive by
- # -iob, but they are on by default so we would generate excessive
- # warnings if we noted them. They are:
- # $rOpts->{'break-at-old-keyword-breakpoints'}
- # $rOpts->{'break-at-old-logical-breakpoints'}
- # $rOpts->{'break-at-old-ternary-breakpoints'}
- # $rOpts->{'break-at-old-attribute-breakpoints'}
- }
+ if ( $token eq '}' ) {
+ if (@open_block_stack) { pop @open_block_stack }
+ }
+ next unless ( $token eq '{' );
- # very frequently used parameters made global for efficiency
- $rOpts_closing_side_comment_maximum_text =
- $rOpts->{'closing-side-comment-maximum-text'};
- $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
+ # block must be balanced (bad scripts may be unbalanced)
+ my $K_opening = $K_opening_container->{$type_sequence};
+ my $K_closing = $K_closing_container->{$type_sequence};
+ next unless ( defined($K_opening) && defined($K_closing) );
- # Note that both opening and closing tokens can access the opening
- # and closing flags of their container types.
- %opening_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness'},
- '{' => $rOpts->{'brace-vertical-tightness'},
- '[' => $rOpts->{'square-bracket-vertical-tightness'},
- ')' => $rOpts->{'paren-vertical-tightness'},
- '}' => $rOpts->{'brace-vertical-tightness'},
- ']' => $rOpts->{'square-bracket-vertical-tightness'},
- );
+ # require that this block be entirely on one line
+ next if ( $is_broken_block->($type_sequence) );
- %closing_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness-closing'},
- '{' => $rOpts->{'brace-vertical-tightness-closing'},
- '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- ')' => $rOpts->{'paren-vertical-tightness-closing'},
- '}' => $rOpts->{'brace-vertical-tightness-closing'},
- ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- );
+ # See if this block fits on one line of allowed length (which may
+ # be different from the input script)
+ $starting_lentot =
+ $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
- # assume flag for '>' same as ')' for closing qw quotes
- %closing_token_indentation = (
- ')' => $rOpts->{'closing-paren-indentation'},
- '}' => $rOpts->{'closing-brace-indentation'},
- ']' => $rOpts->{'closing-square-bracket-indentation'},
- '>' => $rOpts->{'closing-paren-indentation'},
- );
+ # Dump the stack if block is too long and skip this block
+ if ( $excess_length_to_K->($K_closing) > 0 ) {
+ @open_block_stack = ();
+ next;
+ }
- # flag indicating if any closing tokens are indented
- $some_closing_token_indentation =
- $rOpts->{'closing-paren-indentation'}
- || $rOpts->{'closing-brace-indentation'}
- || $rOpts->{'closing-square-bracket-indentation'}
- || $rOpts->{'indent-closing-brace'};
+ # OK, Block passes tests, remember it
+ push @open_block_stack, $type_sequence;
- %opening_token_right = (
- '(' => $rOpts->{'opening-paren-right'},
- '{' => $rOpts->{'opening-hash-brace-right'},
- '[' => $rOpts->{'opening-square-bracket-right'},
- );
+ # We are only marking nested code blocks,
+ # so check for a previous block on the stack
+ next unless ( @open_block_stack > 1 );
- %stack_opening_token = (
- '(' => $rOpts->{'stack-opening-paren'},
- '{' => $rOpts->{'stack-opening-hash-brace'},
- '[' => $rOpts->{'stack-opening-square-bracket'},
- );
+ # Looks OK, mark this as a short nested block
+ $rshort_nested->{$type_sequence} = 1;
- %stack_closing_token = (
- ')' => $rOpts->{'stack-closing-paren'},
- '}' => $rOpts->{'stack-closing-hash-brace'},
- ']' => $rOpts->{'stack-closing-square-bracket'},
- );
+ }
return;
}
-sub bad_pattern {
+sub adjust_indentation_levels {
- # See if a pattern will compile. We have to use a string eval here,
- # but it should be safe because the pattern has been constructed
- # by this program.
- my ($pattern) = @_;
- eval "'##'=~/$pattern/";
- return $@;
-}
-
-{ ## begin closure prepare_cuddled_block_types
-
- my %no_cuddle;
+ my ($self) = @_;
- # Add keywords here which really should not be cuddled
- BEGIN {
- my @q = qw(if unless for foreach while);
- @no_cuddle{@q} = (1) x scalar(@q);
- }
+ # Called once per file to do special indentation adjustments.
+ # These routines adjust levels either by changing _CI_LEVEL_ directly or
+ # by setting modified levels in the array $self->[_radjusted_levels_].
+ # They will create this array if they are active, and otherwise it will be
+ # an empty array for later efficiency.
- sub prepare_cuddled_block_types {
+ # Set adjusted levels for any non-indenting braces.
+ # If this option is used it will create the _radjusted_levels_ array.
+ # Important: This must be the first routine called which touches
+ # _radjusted_levels_
+ $self->non_indenting_braces();
- # the cuddled-else style, if used, is controlled by a hash that
- # we construct here
+ # Adjust indentation for list containers
+ $self->adjust_container_indentation();
- # Include keywords here which should not be cuddled
+ # Set adjusted levels for the whitespace cycle option.
+ $self->whitespace_cycle_adjustment();
- my $cuddled_string = "";
- if ( $rOpts->{'cuddled-else'} ) {
+ # Adjust continuation indentation if -bli is set
+ $self->bli_adjustment();
- # set the default
- $cuddled_string = 'elsif else continue catch finally'
- unless ( $rOpts->{'cuddled-block-list-exclusive'} );
+ # Now clip any adjusted levels to be non-negative
+ $self->clip_adjusted_levels();
- # This is the old equivalent but more complex version
- # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
+ return;
+}
- # Add users other blocks to be cuddled
- my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
- if ($cuddled_block_list) {
- $cuddled_string .= " " . $cuddled_block_list;
- }
+sub initialize_adjusted_levels {
+ my ($self) = @_;
+ # Initialize _radjusted_levels if it has not yet been initialized.
+ # It is only needed when certain special adjustments are done.
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $rLL = $self->[_rLL_];
+ my $Kmax = @{$rLL} - 1;
+ if ( !defined($radjusted_levels) || ( @{$radjusted_levels} != @{$rLL} ) ) {
+ foreach my $KK ( 0 .. $Kmax ) {
+ $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
}
+ }
+ return;
+}
- # If we have a cuddled string of the form
- # 'try-catch-finally'
-
- # we want to prepare a hash of the form
+sub clip_adjusted_levels {
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ # Replace any negative adjusted levels with zero.
+ # Negative levels can occur in files with brace errors.
+ my ($self) = @_;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ return unless defined($radjusted_levels) && @{$radjusted_levels};
+ foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
+ return;
+}
- # use -dcbl to dump this hash
+sub non_indenting_braces {
- # Multiple such strings are input as a space or comma separated list
+ # Called once per file to handle the --non-indenting-braces parameter.
+ # Remove indentation within marked braces if requested
+ # NOTE: This must be the first routine to reference $radjusted_levels;
+ my ($self) = @_;
+ return unless ( $rOpts->{'non-indenting-braces'} );
- # If we get two lists with the same leading type, such as
- # -cbl = "-try-catch-finally -try-catch-otherwise"
- # then they will get merged as follows:
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 2,
- # 'otherwise' => 1,
- # },
- # };
- # This will allow either type of chain to be followed.
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
- my @cuddled_strings = split /\s+/, $cuddled_string;
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
- $rcuddled_block_types = {};
+ my $radjusted_levels;
+ my $Kmax = @{$rLL} - 1;
+ my @seqno_stack;
- # process each dash-separated string...
- my $string_count = 0;
- foreach my $string (@cuddled_strings) {
- next unless $string;
- my @words = split /-+/, $string; # allow multiple dashes
+ my $is_non_indenting_brace = sub {
+ my ($KK) = @_;
- # we could look for and report possible errors here...
- next unless ( @words > 0 );
+ # looking for an opening block brace
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ return unless ( $token eq '{' && $block_type );
- # allow either '-continue' or *-continue' for arbitrary starting type
- my $start = '*';
+ # followed by a comment
+ my $K_sc = $self->K_next_nonblank($KK);
+ return unless defined($K_sc);
+ my $type_sc = $rLL->[$K_sc]->[_TYPE_];
+ return unless ( $type_sc eq '#' );
- # a single word without dashes is a secondary block type
- if ( @words > 1 ) {
- $start = shift @words;
- }
+ # on the same line
+ my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
+ my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
+ return unless ( $line_index_sc == $line_index );
- # always make an entry for the leading word. If none follow, this
- # will still prevent a wildcard from matching this word.
- if ( !defined( $rcuddled_block_types->{$start} ) ) {
- $rcuddled_block_types->{$start} = {};
- }
+ # get the side comment text
+ my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
- # The count gives the original word order in case we ever want it.
- $string_count++;
- my $word_count = 0;
- foreach my $word (@words) {
- next unless $word;
- if ( $no_cuddle{$word} ) {
- Warn(
-"## Ignoring keyword '$word' in -cbl; does not seem right\n"
- );
- next;
- }
- $word_count++;
- $rcuddled_block_types->{$start}->{$word} =
- 1; #"$string_count.$word_count";
+ # The pattern ends in \s but we have removed the newline, so
+ # we added it back for the match. That way we require an exact
+ # match to the special string and also allow additional text.
+ $token_sc .= "\n";
+ my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
+ if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
+ return $is_nib;
+ };
- # git#9: Remove this word from the list of desired one-line
- # blocks
- $want_one_line_block{$word} = 0;
+ foreach my $KK ( 0 .. $Kmax ) {
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $level_abs = $rLL->[$KK]->[_LEVEL_];
+ my $level = $level_abs;
+ my $num = @seqno_stack;
+ if ($seqno) {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
+ push @seqno_stack, $seqno;
+ }
+ if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
+ pop @seqno_stack;
+ $num -= 1;
}
}
- return;
+ $radjusted_levels->[$KK] = $level - $num;
}
-} ## begin closure prepare_cuddled_block_types
-
-sub dump_cuddled_block_list {
- my ($fh) = @_;
-
- # ORIGINAL METHOD: Here is the format of the cuddled block type hash
- # which controls this routine
- # my $rcuddled_block_types = {
- # 'if' => {
- # 'else' => 1,
- # 'elsif' => 1
- # },
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ $self->[_radjusted_levels_] = $radjusted_levels;
+ return;
+}
- # SIMPLFIED METHOD: the simplified method uses a wildcard for
- # the starting block type and puts all cuddled blocks together:
- # my $rcuddled_block_types = {
- # '*' => {
- # 'else' => 1,
- # 'elsif' => 1
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+sub whitespace_cycle_adjustment {
- # Both methods work, but the simplified method has proven to be adequate and
- # easier to manage.
+ my $self = shift;
- my $cuddled_string = $rOpts->{'cuddled-block-list'};
- $cuddled_string = '' unless $cuddled_string;
+ # Called once per file to implement the --whitespace-cycle option
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $radjusted_levels = $self->[_radjusted_levels_];
- my $flags = "";
- $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
- $flags .= " -cbl='$cuddled_string'";
+ my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+ if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
- unless ( $rOpts->{'cuddled-else'} ) {
- $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
- }
+ my $Kmax = @{$rLL} - 1;
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-Hash of cuddled block types prepared for a run with these parameters:
- $flags
-------------------------------------------------------------------------
-EOM
+ $self->initialize_adjusted_levels();
- use Data::Dumper;
- $fh->print( Dumper($rcuddled_block_types) );
+ my $whitespace_last_level = -1;
+ my @whitespace_level_stack = ();
+ my $last_nonblank_type = 'b';
+ my $last_nonblank_token = '';
+ foreach my $KK ( 0 .. $Kmax ) {
+ my $level_abs = $radjusted_levels->[$KK]; ##$rLL->[$KK]->[_LEVEL_];
+ my $level = $level_abs;
+ if ( $level_abs < $whitespace_last_level ) {
+ pop(@whitespace_level_stack);
+ }
+ if ( !@whitespace_level_stack ) {
+ push @whitespace_level_stack, $level_abs;
+ }
+ elsif ( $level_abs > $whitespace_last_level ) {
+ $level = $whitespace_level_stack[-1] +
+ ( $level_abs - $whitespace_last_level );
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-EOM
- return;
-}
-
-sub make_static_block_comment_pattern {
+ if (
+ # 1 Try to break at a block brace
+ (
+ $level > $rOpts_whitespace_cycle
+ && $last_nonblank_type eq '{'
+ && $last_nonblank_token eq '{'
+ )
- # create the pattern used to identify static block comments
- $static_block_comment_pattern = '^\s*##';
+ # 2 Then either a brace or bracket
+ || ( $level > $rOpts_whitespace_cycle + 1
+ && $last_nonblank_token =~ /^[\{\[]$/ )
- # allow the user to change it
- if ( $rOpts->{'static-block-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-block-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = $prefix;
+ # 3 Then a paren too
+ || $level > $rOpts_whitespace_cycle + 2
+ )
+ {
+ $level = 1;
+ }
+ push @whitespace_level_stack, $level;
+ }
+ $level = $whitespace_level_stack[-1];
+ $radjusted_levels->[$KK] = $level;
- # user may give leading caret to force matching left comments only
- if ( $prefix !~ /^\^#/ ) {
- if ( $prefix !~ /^#/ ) {
- Die(
-"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
- );
+ $whitespace_last_level = $level_abs;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
}
- $pattern = '^\s*' . $prefix;
- }
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
}
- $static_block_comment_pattern = $pattern;
}
+ $self->[_radjusted_levels_] = $radjusted_levels;
return;
}
-sub make_format_skipping_pattern {
- my ( $opt_name, $default ) = @_;
- my $param = $rOpts->{$opt_name};
- unless ($param) { $param = $default }
- $param =~ s/^\s*//;
- if ( $param !~ /^#/ ) {
- Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
- }
- my $pattern = '^' . $param . '\s';
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
- );
- }
- return $pattern;
-}
+sub adjust_container_indentation {
-sub make_non_indenting_brace_pattern {
+ # Called once per file to implement the -bbhb* and related flags:
- # Create the pattern used to identify static side comments.
- # Note that we are ending the pattern in a \s. This will allow
- # the pattern to be followed by a space and some text, or a newline.
- # The pattern is used in sub 'non_indenting_braces'
- $non_indenting_brace_pattern = '^#<<<\s';
+ # -bbhbi=n
+ # -bbsbi=n
+ # -bbpi=n
- # allow the user to change it
- if ( $rOpts->{'non-indenting-brace-prefix'} ) {
- my $prefix = $rOpts->{'non-indenting-brace-prefix'};
- $prefix =~ s/^\s*//;
- if ( $prefix !~ /^#/ ) {
- Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
- }
- my $pattern = '^' . $prefix . '\s';
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
+ # where:
+
+ # n=0 default indentation (usually one ci)
+ # n=1 outdent one ci
+ # n=2 indent one level (minus one ci)
+ # n=3 indent one extra ci [This may be dropped]
+
+ my ($self) = @_;
+
+ return unless %container_indentation_options;
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ # Option 2 needs the following array:
+ my $radjusted_levels = $self->[_radjusted_levels_];
+
+ # We will only initialize it if option 2 has been selected
+ foreach my $key (%container_indentation_options) {
+ my $val = $container_indentation_options{$key};
+ if ( defined($val) && $val == 2 ) {
+ $self->initialize_adjusted_levels();
+ last;
}
- $non_indenting_brace_pattern = $pattern;
}
- return;
-}
-sub make_closing_side_comment_list_pattern {
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+ my $KK = $K_opening_container->{$seqno};
- # turn any input list into a regex for recognizing selected block types
- $closing_side_comment_list_pattern = '^\w+';
- if ( defined( $rOpts->{'closing-side-comment-list'} )
- && $rOpts->{'closing-side-comment-list'} )
- {
- $closing_side_comment_list_pattern =
- make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
- }
- return;
-}
+ # this routine is not for code block braces
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ next if ($block_type);
-sub make_sub_matching_pattern {
+ # These flags only apply if the corresponding -bb* flags
+ # have been set to non-default values
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $flag = $container_indentation_options{$token};
+ next unless ($flag);
- # Patterns for standardizing matches to block types for regular subs and
- # anonymous subs. Examples
- # 'sub process' is a named sub
- # 'sub ::m' is a named sub
- # 'sub' is an anonymous sub
- # 'sub:' is a label, not a sub
- # 'substr' is a keyword
- $SUB_PATTERN = '^sub\s+(::|\w)'; # match normal sub
- $ASUB_PATTERN = '^sub$'; # match anonymous sub
- $ANYSUB_PATTERN = '^sub\b'; # match either type of sub
+ # Require previous nonblank to be certain types (= and =>)
+ # Note similar coding in sub insert_breaks_before...
+ my $Kprev = $KK - 1;
+ next if ( $Kprev < 0 );
+ my $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ if ( $prev_type eq 'b' ) {
+ $Kprev--;
+ next if ( $Kprev < 0 );
+ $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ }
+ next unless ( $is_equal_or_fat_comma{$prev_type} );
- # Note (see also RT #133130): These patterns are used by
- # sub make_block_pattern, which is used for making most patterns.
- # So this sub needs to be called before other pattern-making routines.
+ # This is only for list containers
+ next unless $self->is_list($seqno);
- if ( $rOpts->{'sub-alias-list'} ) {
+ # and only for broken lists
+ next unless $ris_broken_container->{$seqno};
- # Note that any 'sub-alias-list' has been preprocessed to
- # be a trimmed, space-separated list which includes 'sub'
- # for example, it might be 'sub method fun'
- my $sub_alias_list = $rOpts->{'sub-alias-list'};
- $sub_alias_list =~ s/\s+/\|/g;
- $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- }
- return;
-}
+ # NOTE: We are adjusting indentation of the opening container. The
+ # closing container will normally follow the indentation of the opening
+ # container automatically, so this is not currently done.
+ my $ci = $rLL->[$KK]->[_CI_LEVEL_];
+ next unless ($ci);
-sub make_bli_pattern {
+ # option 1: outdent
+ if ( $flag == 1 ) {
+ $ci -= 1;
+ }
- # default list of block types for which -bli would apply
- my $bli_list_string = 'if else elsif unless while for foreach do : sub';
+ # option 2: indent one level
+ elsif ( $flag == 2 ) {
+ $ci -= 1;
+ $radjusted_levels->[$KK] += 1;
+ }
- if ( defined( $rOpts->{'brace-left-and-indent-list'} )
- && $rOpts->{'brace-left-and-indent-list'} )
- {
- $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
+ # option 3: for testing only, probably will be deleted
+ elsif ( $flag == 3 ) {
+ $ci += 1;
+ }
+ $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
}
-
- $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
return;
}
-sub make_keyword_group_list_pattern {
+sub bli_adjustment {
- # turn any input list into a regex for recognizing selected block types.
- # Here are the defaults:
- $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
- $keyword_group_list_comment_pattern = '';
- if ( defined( $rOpts->{'keyword-group-blanks-list'} )
- && $rOpts->{'keyword-group-blanks-list'} )
- {
- my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
- my @keyword_list;
- my @comment_list;
- foreach my $word (@words) {
- if ( $word =~ /^(BC|SBC)$/ ) {
- push @comment_list, $word;
- if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
- }
- else {
- push @keyword_list, $word;
- }
+ # Called once per file to implement the --brace-left-and-indent option.
+ # If -bli is set, adds one continuation indentation for certain braces
+ my $self = shift;
+ return unless ( $rOpts->{'brace-left-and-indent'} );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ if ( $block_type && $block_type =~ /$bli_pattern/ ) {
+ $rLL->[$KK]->[_CI_LEVEL_]++;
}
- $keyword_group_list_pattern =
- make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
- $keyword_group_list_comment_pattern =
- make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
}
return;
}
-sub make_block_brace_vertical_tightness_pattern {
+######################################
+# CODE SECTION 6: Process line-by-line
+######################################
- # turn any input list into a regex for recognizing selected block types
- $block_brace_vertical_tightness_pattern =
- '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
- && $rOpts->{'block-brace-vertical-tightness-list'} )
- {
- $block_brace_vertical_tightness_pattern =
- make_block_pattern( '-bbvtl',
- $rOpts->{'block-brace-vertical-tightness-list'} );
- }
- return;
-}
+sub process_all_lines {
-sub make_blank_line_pattern {
+ # Main loop over all lines of a file.
+ # Lines are processed according to type.
- $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
- my $key = 'blank-lines-before-closing-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_before_closing_block_pattern =
- make_block_pattern( '-blbcl', $rOpts->{$key} );
- }
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ my $sink_object = $self->[_sink_object_];
+ my $fh_tee = $self->[_fh_tee_];
+ my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ my $file_writer_object = $self->[_file_writer_object_];
- $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
- $key = 'blank-lines-after-opening-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_after_opening_block_pattern =
- make_block_pattern( '-blaol', $rOpts->{$key} );
- }
- return;
-}
+ # Note for RT#118553, leave only one newline at the end of a file.
+ # Example code to do this is in comments below:
+ # my $Opt_trim_ending_blank_lines = 0;
+ # if ($Opt_trim_ending_blank_lines) {
+ # while ( my $line_of_tokens = pop @{$rlines} ) {
+ # my $line_type = $line_of_tokens->{_line_type};
+ # if ( $line_type eq 'CODE' ) {
+ # my $CODE_type = $line_of_tokens->{_code_type};
+ # next if ( $CODE_type eq 'BL' );
+ # }
+ # push @{$rlines}, $line_of_tokens;
+ # last;
+ # }
+ # }
-sub make_block_pattern {
+ # But while this would be a trivial update, it would have very undesirable
+ # side effects when perltidy is run from within an editor on a small snippet.
+ # So this is best done with a separate filter, such
+ # as 'delete_ending_blank_lines.pl' in the examples folder.
- # given a string of block-type keywords, return a regex to match them
- # The only tricky part is that labels are indicated with a single ':'
- # and the 'sub' token text may have additional text after it (name of
- # sub).
- #
- # Example:
- #
- # input string: "if else elsif unless while for foreach do : sub";
- # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ # Flag to prevent blank lines when POD occurs in a format skipping sect.
+ my $in_format_skipping_section;
- # Minor Update:
- #
- # To distinguish between anonymous subs and named subs, use 'sub' to
- # indicate a named sub, and 'asub' to indicate an anonymous sub
+ # set locations for blanks around long runs of keywords
+ my $rwant_blank_line_after = $self->keyword_group_scan();
- my ( $abbrev, $string ) = @_;
- my @list = split_words($string);
- my @words = ();
- my %seen;
- for my $i (@list) {
- if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
- next if $seen{$i};
- $seen{$i} = 1;
- if ( $i eq 'sub' ) {
- }
- elsif ( $i eq 'asub' ) {
- }
- elsif ( $i eq ';' ) {
- push @words, ';';
- }
- elsif ( $i eq '{' ) {
- push @words, '\{';
- }
- elsif ( $i eq ':' ) {
- push @words, '\w+:';
- }
- elsif ( $i =~ /^\w/ ) {
- push @words, $i;
- }
- else {
- Warn("unrecognized block type $i after $abbrev, ignoring\n");
+ my $line_type = "";
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $i++;
+
+ # insert blank lines requested for keyword sequences
+ if ( $i > 0
+ && defined( $rwant_blank_line_after->{ $i - 1 } )
+ && $rwant_blank_line_after->{ $i - 1 } == 1 )
+ {
+ $self->want_blank_line();
}
- }
- my $pattern = '(' . join( '|', @words ) . ')$';
- my $sub_patterns = "";
- if ( $seen{'sub'} ) {
- $sub_patterns .= '|' . $SUB_PATTERN;
- }
- if ( $seen{'asub'} ) {
- $sub_patterns .= '|' . $ASUB_PATTERN;
- }
- if ($sub_patterns) {
- $pattern = '(' . $pattern . $sub_patterns . ')';
- }
- $pattern = '^' . $pattern;
- return $pattern;
-}
-sub make_static_side_comment_pattern {
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ my $input_line = $line_of_tokens->{_line_text};
- # create the pattern used to identify static side comments
- $static_side_comment_pattern = '^##';
+ # _line_type codes are:
+ # SYSTEM - system-specific code before hash-bang line
+ # CODE - line of perl code (including comments)
+ # POD_START - line starting pod, such as '=head'
+ # POD - pod documentation text
+ # POD_END - last line of pod section, '=cut'
+ # HERE - text of here-document
+ # HERE_END - last line of here-doc (target word)
+ # FORMAT - format section
+ # FORMAT_END - last line of format section, '.'
+ # DATA_START - __DATA__ line
+ # DATA - unidentified text following __DATA__
+ # END_START - __END__ line
+ # END - unidentified text following __END__
+ # ERROR - we are in big trouble, probably not a perl script
- # allow the user to change it
- if ( $rOpts->{'static-side-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-side-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = '^' . $prefix;
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
+ $self->want_blank_line();
+ }
}
- $static_side_comment_pattern = $pattern;
- }
- return;
-}
-sub make_closing_side_comment_prefix {
+ # handle line of code..
+ if ( $line_type eq 'CODE' ) {
- # Be sure we have a valid closing side comment prefix
- my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
- my $csc_prefix_pattern;
- if ( !defined($csc_prefix) ) {
- $csc_prefix = '## end';
- $csc_prefix_pattern = '^##\s+end';
- }
- else {
- my $test_csc_prefix = $csc_prefix;
- if ( $test_csc_prefix !~ /^#/ ) {
- $test_csc_prefix = '#' . $test_csc_prefix;
- }
+ my $CODE_type = $line_of_tokens->{_code_type};
+ $in_format_skipping_section = $CODE_type eq 'FS';
- # make a regex to recognize the prefix
- my $test_csc_prefix_pattern = $test_csc_prefix;
+ # Handle blank lines
+ if ( $CODE_type eq 'BL' ) {
- # escape any special characters
- $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
+ # If keep-old-blank-lines is zero, we delete all
+ # old blank lines and let the blank line rules generate any
+ # needed blanks.
- $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
+ # and delete lines requested by the keyword-group logic
+ my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
+ && $rwant_blank_line_after->{$i} == 2 );
- # allow exact number of intermediate spaces to vary
- $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
+ # But: the keep-old-blank-lines flag has priority over kgb flags
+ $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
- # make sure we have a good pattern
- # if we fail this we probably have an error in escaping
- # characters.
+ if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
+ $self->flush($CODE_type);
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
+ $self->[_last_line_leading_type_] = 'b';
+ }
+ next;
+ }
+ else {
- if ( bad_pattern($test_csc_prefix_pattern) ) {
+ # let logger see all non-blank lines of code
+ my $output_line_number = $self->get_output_line_number();
+ black_box( $line_of_tokens, $output_line_number );
+ }
- # shouldn't happen..must have screwed up escaping, above
- report_definite_bug();
- Warn(
-"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
- );
+ # Handle Format Skipping (FS) and Verbatim (VB) Lines
+ if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
+ $self->write_unindented_line("$input_line");
+ $file_writer_object->reset_consecutive_blank_lines();
+ next;
+ }
- # just warn and keep going with defaults
- Warn("Please consider using a simpler -cscp prefix\n");
- Warn("Using default -cscp instead; please check output\n");
+ # Handle all other lines of code
+ $self->process_line_of_CODE($line_of_tokens);
}
+
+ # handle line of non-code..
else {
- $csc_prefix = $test_csc_prefix;
- $csc_prefix_pattern = $test_csc_prefix_pattern;
- }
- }
- $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
- $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
- return;
-}
-sub dump_want_left_space {
- my $fh = shift;
- local $" = "\n";
- $fh->print(<<EOM);
-These values are the main control of whitespace to the left of a token type;
-They may be altered with the -wls parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its left
--1 means the token does not want a space to its left
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_left_space ) {
- $fh->print("$key\t$want_left_space{$key}\n");
- }
- return;
-}
+ # set special flags
+ my $skip_line = 0;
+ if ( $line_type =~ /^POD/ ) {
-sub dump_want_right_space {
- my $fh = shift;
- local $" = "\n";
- $fh->print(<<EOM);
-These values are the main control of whitespace to the right of a token type;
-They may be altered with the -wrs parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its right
--1 means the token does not want a space to its right
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_right_space ) {
- $fh->print("$key\t$want_right_space{$key}\n");
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
+ if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
+ && !$in_format_skipping_section
+ && $line_type eq 'POD_START'
+ && !$self->[_saw_END_or_DATA_] )
+ {
+ $self->want_blank_line();
+ }
+ if ( $rOpts->{'tee-pod'} ) {
+ $fh_tee->print($input_line) if ($fh_tee);
+ }
+ }
+
+ # leave the blank counters in a predictable state
+ # after __END__ or __DATA__
+ elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ $self->[_saw_END_or_DATA_] = 1;
+ }
+
+ # write unindented non-code line
+ if ( !$skip_line ) {
+ $self->write_unindented_line($input_line);
+ }
+ }
}
return;
-}
-{ ## begin closure is_essential_whitespace
+} ## end sub process_all_lines
- my %is_sort_grep_map;
- my %is_for_foreach;
- my %is_digraph;
- my %is_trigraph;
+sub keyword_group_scan {
+ my $self = shift;
- BEGIN {
+ # Called once per file to process the --keyword-group-blanks-* parameters.
- my @q;
- @q = qw(sort grep map);
- @is_sort_grep_map{@q} = (1) x scalar(@q);
+ # Manipulate blank lines around keyword groups (kgb* flags)
+ # Scan all lines looking for runs of consecutive lines beginning with
+ # selected keywords. Example keywords are 'my', 'our', 'local', ... but
+ # they may be anything. We will set flags requesting that blanks be
+ # inserted around and within them according to input parameters. Note
+ # that we are scanning the lines as they came in in the input stream, so
+ # they are not necessarily well formatted.
- @q = qw(for foreach);
- @is_for_foreach{@q} = (1) x scalar(@q);
+ # The output of this sub is a return hash ref whose keys are the indexes of
+ # lines after which we desire a blank line. For line index i:
+ # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
+ # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
+ my $rhash_of_desires = {};
- @q = qw(
- .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
- );
- @is_digraph{@q} = (1) x scalar(@q);
+ my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
+ my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
+ my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
+ my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
+ my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
- @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
- @is_trigraph{@q} = (1) x scalar(@q);
+ # A range of sizes can be input with decimal notation like 'min.max' with
+ # any number of dots between the two numbers. Examples:
+ # string => min max matches
+ # 1.1 1 1 exactly 1
+ # 1.3 1 3 1,2, or 3
+ # 1..3 1 3 1,2, or 3
+ # 5 5 - 5 or more
+ # 6. 6 - 6 or more
+ # .2 - 2 up to 2
+ # 1.0 1 0 nothing
+ my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
+ if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
+ || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
+ {
+ Warn(<<EOM);
+Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
+ignoring all -kgb flags
+EOM
+
+ # Turn this option off so that this message does not keep repeating
+ # during iterations and other files.
+ $rOpts->{'keyword-group-blanks-size'} = "";
+ return $rhash_of_desires;
}
+ $Opt_size_min = 1 unless ($Opt_size_min);
- sub is_essential_whitespace {
+ if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
+ return $rhash_of_desires;
+ }
- # Essential whitespace means whitespace which cannot be safely deleted
- # without risking the introduction of a syntax error.
- # We are given three tokens and their types:
- # ($tokenl, $typel) is the token to the left of the space in question
- # ($tokenr, $typer) is the token to the right of the space in question
- # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
- #
- # This is a slow routine but is not needed too often except when -mangle
- # is used.
- #
- # Note: This routine should almost never need to be changed. It is
- # for avoiding syntax problems rather than for formatting.
+ # codes for $Opt_blanks_before and $Opt_blanks_after:
+ # 0 = never (delete if exist)
+ # 1 = stable (keep unchanged)
+ # 2 = always (insert if missing)
- my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
+ return $rhash_of_desires
+ unless $Opt_size_min > 0
+ && ( $Opt_blanks_before != 1
+ || $Opt_blanks_after != 1
+ || $Opt_blanks_inside
+ || $Opt_blanks_delete );
- my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
- my $tokenr_is_open_paren = $tokenr eq '(';
- my $token_joined = $tokenl . $tokenr;
- my $tokenl_is_dash = $tokenl eq '-';
+ my $Opt_pattern = $keyword_group_list_pattern;
+ my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
+ my $Opt_repeat_count =
+ $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
- my $result =
+ my $rlines = $self->[_rlines_];
+ my $rLL = $self->[_rLL_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # never combine two bare words or numbers
- # examples: and ::ok(1)
- # return ::spw(...)
- # for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- # $input eq"quit" to make $inputeq"quit"
- # my $size=-s::SINK if $file; <==OK but we won't do it
- # don't join something like: for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
- && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
+ # variables for the current group and subgroups:
+ my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
+ @subgroup );
- # do not combine a number with a concatenation dot
- # example: pom.caputo:
- # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
- || $typel eq 'n' && $tokenr eq '.'
- || $typer eq 'n'
- && $tokenl eq '.'
+ # Definitions:
+ # ($ibeg, $iend) = starting and ending line indexes of this entire group
+ # $count = total number of keywords seen in this entire group
+ # $level_beg = indententation level of this group
+ # @group = [ $i, $token, $count ] =list of all keywords & blanks
+ # @subgroup = $j, index of group where token changes
+ # @iblanks = line indexes of blank lines in input stream in this group
+ # where i=starting line index
+ # token (the keyword)
+ # count = number of this token in this subgroup
+ # j = index in group where token changes
+ #
+ # These vars will contain values for the most recently seen line:
+ my ( $line_type, $CODE_type, $K_first, $K_last );
- # cases of a space before a bareword...
- || (
- $tokenr_is_bareword && (
+ my $number_of_groups_seen = 0;
- # do not join a minus with a bare word, because you might form
- # a file test operator. Example from Complex.pm:
- # if (CORE::abs($z - i) < $eps);
- # "z-i" would be taken as a file test.
- $tokenl_is_dash && length($tokenr) == 1
+ ####################
+ # helper subroutines
+ ####################
- # and something like this could become ambiguous without space
- # after the '-':
- # use constant III=>1;
- # $a = $b - III;
- # and even this:
- # $a = - III;
- || $tokenl_is_dash && $typer =~ /^[wC]$/
+ my $insert_blank_after = sub {
+ my ($i) = @_;
+ $rhash_of_desires->{$i} = 1;
+ my $ip = $i + 1;
+ if ( defined( $rhash_of_desires->{$ip} )
+ && $rhash_of_desires->{$ip} == 2 )
+ {
+ $rhash_of_desires->{$ip} = 0;
+ }
+ return;
+ };
- # keep a space between a quote and a bareword to prevent the
- # bareword from becoming a quote modifier.
- || $typel eq 'Q'
+ my $split_into_sub_groups = sub {
- # keep a space between a token ending in '$' and any word;
- # this caused trouble: "die @$ if $@"
- || $typel eq 'i' && $tokenl =~ /\$$/
+ # place blanks around long sub-groups of keywords
+ # ...if requested
+ return unless ($Opt_blanks_inside);
- # do not remove space between an '&' and a bare word because
- # it may turn into a function evaluation, like here
- # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
- # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
- || $typel eq '&'
+ # loop over sub-groups, index k
+ push @subgroup, scalar @group;
+ my $kbeg = 1;
+ my $kend = @subgroup - 1;
+ for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
- # don't combine $$ or $# with any alphanumeric
- # (testfile mangle.t with --mangle)
- || $tokenl =~ /^\$[\$\#]$/
+ # index j runs through all keywords found
+ my $j_b = $subgroup[ $k - 1 ];
+ my $j_e = $subgroup[$k] - 1;
- )
- ) ## end $tokenr_is_bareword
+ # index i is the actual line number of a keyword
+ my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
+ my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
+ my $num = $count_e - $count_b + 1;
- # OLD, not used
- # '= -' should not become =- or you will get a warning
- # about reversed -=
- # || ($tokenr eq '-')
+ # This subgroup runs from line $ib to line $ie-1, but may contain
+ # blank lines
+ if ( $num >= $Opt_size_min ) {
- # do not join a bare word with a minus, like between 'Send' and
- # '-recipients' here <<snippets/space3.in>>
- # my $msg = new Fax::Send
- # -recipients => $to,
- # -data => $data;
- # This is the safest thing to do. If we had the token to the right of
- # the minus we could do a better check.
- || $tokenr eq '-' && $typel eq 'w'
+ # if there are blank lines, we require that at least $num lines
+ # be non-blank up to the boundary with the next subgroup.
+ my $nog_b = my $nog_e = 1;
+ if ( @iblanks && !$Opt_blanks_delete ) {
+ my $j_bb = $j_b + $num - 1;
+ my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
+ $nog_b = $count_bb - $count_b + 1 == $num;
- # perl is very fussy about spaces before <<
- || $tokenr =~ /^\<\</
+ my $j_ee = $j_e - ( $num - 1 );
+ my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
+ $nog_e = $count_e - $count_ee + 1 == $num;
+ }
+ if ( $nog_b && $k > $kbeg ) {
+ $insert_blank_after->( $i_b - 1 );
+ }
+ if ( $nog_e && $k < $kend ) {
+ my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
+ $insert_blank_after->( $i_ep - 1 );
+ }
+ }
+ }
+ };
- # avoid combining tokens to create new meanings. Example:
- # $a+ +$b must not become $a++$b
- || ( $is_digraph{$token_joined} )
- || $is_trigraph{$token_joined}
+ my $delete_if_blank = sub {
+ my ($i) = @_;
- # another example: do not combine these two &'s:
- # allow_options & &OPT_EXECCGI
- || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
+ # delete line $i if it is blank
+ return unless ( $i >= 0 && $i < @{$rlines} );
+ my $line_type = $rlines->[$i]->{_line_type};
+ return if ( $line_type ne 'CODE' );
+ my $code_type = $rlines->[$i]->{_code_type};
+ if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
+ return;
+ };
- # retain any space after possible filehandle
- # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
- || $typel eq 'Z'
+ my $delete_inner_blank_lines = sub {
- # Perl is sensitive to whitespace after the + here:
- # $b = xvals $a + 0.1 * yvals $a;
- || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
+ # always remove unwanted trailing blank lines from our list
+ return unless (@iblanks);
+ while ( my $ibl = pop(@iblanks) ) {
+ if ( $ibl < $iend ) { push @iblanks, $ibl; last }
+ $iend = $ibl;
+ }
- || (
- $tokenr_is_open_paren && (
+ # now mark mark interior blank lines for deletion if requested
+ return unless ($Opt_blanks_delete);
- # keep paren separate in 'use Foo::Bar ()'
- ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
+ while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
- # keep any space between filehandle and paren:
- # file mangle.t with --mangle:
- || $typel eq 'Y'
+ };
- # must have space between grep and left paren; "grep(" will fail
- || $is_sort_grep_map{$tokenl}
+ my $end_group = sub {
- # don't stick numbers next to left parens, as in:
- #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
- || $typel eq 'n'
- )
- ) ## end $tokenr_is_open_paren
+ # end a group of keywords
+ my ($bad_ending) = @_;
+ if ( defined($ibeg) && $ibeg >= 0 ) {
- # retain any space after here doc operator ( hereerr.t)
- || $typel eq 'h'
+ # then handle sufficiently large groups
+ if ( $count >= $Opt_size_min ) {
- # be careful with a space around ++ and --, to avoid ambiguity as to
- # which token it applies
- || $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
- || $typel =~ /^(\+\+|\-\-)$/
- && $tokenr !~ /^[\;\}\)\]]/
+ $number_of_groups_seen++;
- # need space after foreach my; for example, this will fail in
- # older versions of Perl:
- # foreach my$ft(@filetypes)...
- || (
- $tokenl eq 'my'
+ # do any blank deletions regardless of the count
+ $delete_inner_blank_lines->();
- # /^(for|foreach)$/
- && $is_for_foreach{$tokenll}
- && $tokenr =~ /^\$/
- )
+ if ( $ibeg > 0 ) {
+ my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
- # We must be sure that a space between a ? and a quoted string
- # remains if the space before the ? remains. [Loca.pm, lockarea]
- # ie,
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma?',' : ':', @_; # ok!
- # $b=join $comma ?',' : ':', @_; # error!
- # Not really required:
- ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
+ # patch for hash bang line which is not currently marked as
+ # a comment; mark it as a comment
+ if ( $ibeg == 1 && !$code_type ) {
+ my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
+ $code_type = 'BC'
+ if ( $line_text && $line_text =~ /^#/ );
+ }
- # space stacked labels (TODO: check if really necessary)
- || $typel eq 'J' && $typer eq 'J'
+ # Do not insert a blank after a comment
+ # (this could be subject to a flag in the future)
+ if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
+ if ( $Opt_blanks_before == INSERT ) {
+ $insert_blank_after->( $ibeg - 1 );
- ; # the value of this long logic sequence is the result we want
- return $result;
- }
-} ## end closure is_essential_whitespace
+ }
+ elsif ( $Opt_blanks_before == DELETE ) {
+ $delete_if_blank->( $ibeg - 1 );
+ }
+ }
+ }
-{ ## begin closure new_secret_operator_whitespace
+ # We will only put blanks before code lines. We could loosen
+ # this rule a little, but we have to be very careful because
+ # for example we certainly don't want to drop a blank line
+ # after a line like this:
+ # my $var = <<EOM;
+ if ( $line_type eq 'CODE' && defined($K_first) ) {
- my %secret_operators;
- my %is_leading_secret_token;
+ # - Do not put a blank before a line of different level
+ # - Do not put a blank line if we ended the search badly
+ # - Do not put a blank at the end of the file
+ # - Do not put a blank line before a hanging side comment
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- BEGIN {
+ if ( $level == $level_beg
+ && $ci_level == 0
+ && !$bad_ending
+ && $iend < @{$rlines}
+ && $CODE_type ne 'HSC' )
+ {
+ if ( $Opt_blanks_after == INSERT ) {
+ $insert_blank_after->($iend);
+ }
+ elsif ( $Opt_blanks_after == DELETE ) {
+ $delete_if_blank->( $iend + 1 );
+ }
+ }
+ }
+ }
+ $split_into_sub_groups->();
+ }
- # token lists for perl secret operators as compiled by Philippe Bruhat
- # at: https://metacpan.org/module/perlsecret
- %secret_operators = (
- 'Goatse' => [qw#= ( ) =#], #=( )=
- 'Venus1' => [qw#0 +#], # 0+
- 'Venus2' => [qw#+ 0#], # +0
- 'Enterprise' => [qw#) x ! !#], # ()x!!
- 'Kite1' => [qw#~ ~ <>#], # ~~<>
- 'Kite2' => [qw#~~ <>#], # ~~<>
- 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
- 'Bang bang ' => [qw#! !#], # !!
- );
+ # reset for another group
+ $ibeg = -1;
+ $iend = undef;
+ $level_beg = -1;
+ $K_closing = undef;
+ @group = ();
+ @subgroup = ();
+ @iblanks = ();
+ };
- # The following operators and constants are not included because they
- # are normally kept tight by perltidy:
- # ~~ <~>
- #
+ my $find_container_end = sub {
- # Make a lookup table indexed by the first token of each operator:
- # first token => [list, list, ...]
- foreach my $value ( values(%secret_operators) ) {
- my $tok = $value->[0];
- push @{ $is_leading_secret_token{$tok} }, $value;
+ # If the keyword lines ends with an open token, find the closing token
+ # '$K_closing' so that we can easily skip past the contents of the
+ # container.
+ return if ( $K_last <= $K_first );
+ my $KK = $K_last;
+ my $type_last = $rLL->[$KK]->[_TYPE_];
+ my $tok_last = $rLL->[$KK]->[_TOKEN_];
+ if ( $type_last eq '#' ) {
+ $KK = $self->K_previous_nonblank($KK);
+ $tok_last = $rLL->[$KK]->[_TOKEN_];
}
- }
+ if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
- sub new_secret_operator_whitespace {
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $lev = $rLL->[$KK]->[_LEVEL_];
+ if ( $lev == $level_beg ) {
+ $K_closing = $K_closing_container->{$type_sequence};
+ }
+ }
+ };
- my ( $rlong_array, $rwhitespace_flags ) = @_;
+ my $add_to_group = sub {
+ my ( $i, $token, $level ) = @_;
- # Loop over all tokens in this line
- my ( $token, $type );
- my $jmax = @{$rlong_array} - 1;
- foreach my $j ( 0 .. $jmax ) {
+ # End the previous group if we have reached the maximum
+ # group size
+ if ( $Opt_size_max && @group >= $Opt_size_max ) {
+ $end_group->();
+ }
- $token = $rlong_array->[$j]->[_TOKEN_];
- $type = $rlong_array->[$j]->[_TYPE_];
+ if ( @group == 0 ) {
+ $ibeg = $i;
+ $level_beg = $level;
+ $count = 0;
+ }
- # Skip unless this token might start a secret operator
- next if ( $type eq 'b' );
- next unless ( $is_leading_secret_token{$token} );
+ $count++;
+ $iend = $i;
- # Loop over all secret operators with this leading token
- foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
- my $jend = $j - 1;
- foreach my $tok ( @{$rpattern} ) {
- $jend++;
- $jend++
+ # New sub-group?
+ if ( !@group || $token ne $group[-1]->[1] ) {
+ push @subgroup, scalar(@group);
+ }
+ push @group, [ $i, $token, $count ];
- if ( $jend <= $jmax
- && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
- if ( $jend > $jmax
- || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
- {
- $jend = undef;
- last;
- }
- }
-
- if ($jend) {
+ # remember if this line ends in an open container
+ $find_container_end->();
- # set flags to prevent spaces within this operator
- foreach my $jj ( $j + 1 .. $jend ) {
- $rwhitespace_flags->[$jj] = WS_NO;
- }
- $j = $jend;
- last;
- }
- } ## End Loop over all operators
- } ## End loop over all tokens
return;
- } # End sub
-} ## end closure new_secret_operator_whitespace
+ };
-sub tight_paren_follows {
+ ###################################
+ # loop over all lines of the source
+ ###################################
+ $end_group->();
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
- my ( $self, $K_to_go_0, $K_ic ) = @_;
+ $i++;
+ last
+ if ( $Opt_repeat_count > 0
+ && $number_of_groups_seen >= $Opt_repeat_count );
- # Input parameters:
- # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
- # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
- # Return parameter:
- # false if we want a break after the closing do brace
- # true if we do not want a break after the closing do brace
+ $CODE_type = "";
+ $K_first = undef;
+ $K_last = undef;
+ $line_type = $line_of_tokens->{_line_type};
- # We are at the closing brace of a 'do' block. See if this brace is
- # followed by a closing paren, and if so, set a flag which indicates
- # that we do not want a line break between the '}' and ')'.
+ # always end a group at non-CODE
+ if ( $line_type ne 'CODE' ) { $end_group->(); next }
- # xxxxx ( ...... do { ... } ) {
- # ^-------looking at this brace, K_ic
+ $CODE_type = $line_of_tokens->{_code_type};
- # Subscript notation:
- # _i = inner container (braces in this case)
- # _o = outer container (parens in this case)
- # _io = inner opening = '{'
- # _ic = inner closing = '}'
- # _oo = outer opening = '('
- # _oc = outer closing = ')'
+ # end any group at a format skipping line
+ if ( $CODE_type && $CODE_type eq 'FS' ) {
+ $end_group->();
+ next;
+ }
- # |--K_oo |--K_oc = outer container
- # xxxxx ( ...... do { ...... } ) {
- # |--K_io |--K_ic = inner container
+ # continue in a verbatim (VB) type; it may be quoted text
+ if ( $CODE_type eq 'VB' ) {
+ if ( $ibeg >= 0 ) { $iend = $i; }
+ next;
+ }
- # In general, the safe thing to do is return a 'false' value
- # if the statement appears to be complex. This will have
- # the downstream side-effect of opening up outer containers
- # to help make complex code readable. But for simpler
- # do blocks it can be preferable to keep the code compact
- # by returning a 'true' value.
+ # and continue in blank (BL) types
+ if ( $CODE_type eq 'BL' ) {
+ if ( $ibeg >= 0 ) {
+ $iend = $i;
+ push @{iblanks}, $i;
- return unless defined($K_ic);
- my $rLL = $self->[_rLL_];
+ # propagate current subgroup token
+ my $tok = $group[-1]->[1];
+ push @group, [ $i, $tok, $count ];
+ }
+ next;
+ }
- # we should only be called at a closing block
- my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
- return unless ($seqno_i); # shouldn't happen;
+ # examine the first token of this line
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $K_first, $K_last ) = @{$rK_range};
+ if ( !defined($K_first) ) {
- # This only applies if the next nonblank is a ')'
- my $K_oc = $self->K_next_nonblank($K_ic);
- return unless defined($K_oc);
- my $token_next = $rLL->[$K_oc]->[_TOKEN_];
- return unless ( $token_next eq ')' );
+ # Somewhat unexpected blank line..
+ # $rK_range is normally defined for line type CODE, but this can
+ # happen for example if the input line was a single semicolon which
+ # is being deleted. In that case there was code in the input
+ # file but it is not being retained. So we can silently return.
+ return $rhash_of_desires;
+ }
- my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
- my $K_io = $self->[_K_opening_container_]->{$seqno_i};
- my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
- return unless ( defined($K_io) && defined($K_oo) );
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $type = $rLL->[$K_first]->[_TYPE_];
+ my $token = $rLL->[$K_first]->[_TOKEN_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- # RULE 1: Do not break before a closing signature paren
- # (regardless of complexity). This is a fix for issue git#22.
- # Looking for something like:
- # sub xxx ( ... do { ... } ) {
- # ^----- next block_type
- my $K_test = $self->K_next_nonblank($K_oc);
- if ( defined($K_test) ) {
- my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
- if ( $block_type
- && $rLL->[$K_test]->[_TYPE_] eq '{'
- && $block_type =~ /$ANYSUB_PATTERN/ )
+ # see if this is a code type we seek (i.e. comment)
+ if ( $CODE_type
+ && $Opt_comment_pattern
+ && $CODE_type =~ /$Opt_comment_pattern/ )
{
- return 1;
- }
- }
- # RULE 2: Break if the contents within braces appears to be 'complex'. We
- # base this decision on the number of tokens between braces.
-
- # xxxxx ( ... do { ... } ) {
- # ^^^^^^
+ my $tok = $CODE_type;
- # Although very simple, it has the advantages of (1) being insensitive to
- # changes in lengths of identifier names, (2) easy to understand, implement
- # and test. A test case for this is 't/snippets/long_line.in'.
+ # Continuing a group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $add_to_group->( $i, $tok, $level );
+ }
- # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
- # if ( do { $2 !~ /&/ } ) { ... }
+ # Start new group
+ else {
- # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
- # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg > 0 ) { $end_group->(); }
+ $add_to_group->( $i, $tok, $level );
+ }
+ next;
+ }
- # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
- # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
+ # See if it is a keyword we seek, but never start a group in a
+ # continuation line; the code may be badly formatted.
+ if ( $ci_level == 0
+ && $type eq 'k'
+ && $token =~ /$Opt_pattern/ )
+ {
- return if ( $K_ic - $K_io > 16 );
+ # Continuing a keyword group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $add_to_group->( $i, $token, $level );
+ }
- # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
- # As with the previous rule, we decide based on the token count
+ # Start new keyword group
+ else {
- # xxxxx ( ... do { ... } ) {
- # ^^^^^^^^
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg > 0 ) { $end_group->(); }
+ $add_to_group->( $i, $token, $level );
+ }
+ next;
+ }
- # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
- # $K_io - $K_oo = 4 [Pass Rule 3]
- # if ( do { $2 !~ /&/ } ) { ... }
+ # This is not one of our keywords, but we are in a keyword group
+ # so see if we should continue or quit
+ elsif ( $ibeg >= 0 ) {
- # Example: $K_ic - $K_oo = 10 [Pass rule 2]
- # $K_io - $K_oo = 9 [Pass rule 3]
- # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
+ # - bail out on a large level change; we may have walked into a
+ # data structure or anoymous sub code.
+ if ( $level > $level_beg + 1 || $level < $level_beg ) {
+ $end_group->();
+ next;
+ }
- return if ( $K_io - $K_oo > 9 );
+ # - keep going on a continuation line of the same level, since
+ # it is probably a continuation of our previous keyword,
+ # - and keep going past hanging side comments because we never
+ # want to interrupt them.
+ if ( ( ( $level == $level_beg ) && $ci_level > 0 )
+ || $CODE_type eq 'HSC' )
+ {
+ $iend = $i;
+ next;
+ }
- # RULE 4: Break if we have already broken this batch of output tokens
- return if ( $K_oo < $K_to_go_0 );
+ # - continue if if we are within in a container which started with
+ # the line of the previous keyword.
+ if ( defined($K_closing) && $K_first <= $K_closing ) {
- # RULE 5: Break if input is not on one line
- # For example, we will set the flag for the following expression
- # written in one line:
+ # continue if entire line is within container
+ if ( $K_last <= $K_closing ) { $iend = $i; next }
- # This has: $K_ic - $K_oo = 10 [Pass rule 2]
- # $K_io - $K_oo = 8 [Pass rule 3]
- # $self->debug( 'Error: ' . do { local $/; <$err> } );
+ # continue at ); or }; or ];
+ my $KK = $K_closing + 1;
+ if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
+ if ( $KK < $K_last ) {
+ if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
+ if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
+ $end_group->(1);
+ next;
+ }
+ }
+ $iend = $i;
+ next;
+ }
- # but we break after the brace if it is on multiple lines on input, since
- # the user may prefer it on multiple lines:
+ $end_group->(1);
+ next;
+ }
- # [Fail rule 5]
- # $self->debug(
- # 'Error: ' . do { local $/; <$err> }
- # );
+ # - end the group if none of the above
+ $end_group->();
+ next;
+ }
- if ( !$rOpts->{'ignore-old-breakpoints'} ) {
- my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
- my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
- return if ( $iline_oo != $iline_oc );
+ # not in a keyword group; continue
+ else { next }
}
- # OK to keep the paren tight
- return 1;
-}
+ # end of loop over all lines
+ $end_group->();
+ return $rhash_of_desires;
-sub copy_token_as_type {
+} ## end sub keyword_group_scan
- # This provides a quick way to create a new token by
- # slightly modifying an existing token.
- my ( $rold_token, $type, $token ) = @_;
- if ( $type eq 'b' ) {
- $token = " " unless defined($token);
- }
- elsif ( $type eq 'q' ) {
- $token = '' unless defined($token);
- }
- elsif ( $type eq '->' ) {
- $token = '->' unless defined($token);
- }
- elsif ( $type eq ';' ) {
- $token = ';' unless defined($token);
- }
- else {
- Fault(
-"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
- );
- }
+#######################################
+# CODE SECTION 7: Process lines of code
+#######################################
- my $rnew_token = [ map { $_ } @{$rold_token} ];
- $rnew_token->[_TYPE_] = $type;
- $rnew_token->[_TOKEN_] = $token;
- $rnew_token->[_BLOCK_TYPE_] = '';
- $rnew_token->[_CONTAINER_TYPE_] = '';
- $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
- $rnew_token->[_TYPE_SEQUENCE_] = '';
- return $rnew_token;
+sub prepare_for_next_batch {
+ initialize_forced_breakpoint_vars();
+ initialize_gnu_batch_vars();
+ initialize_batch_variables();
+ return;
}
{ ## begin closure process_line_of_CODE
} ## end sub process_line_of_CODE
} ## end closure process_line_of_CODE
-sub consecutive_nonblank_lines {
- my ($self) = @_;
- my $file_writer_object = $self->[_file_writer_object_];
- my $vao = $self->[_vertical_aligner_object_];
- return $file_writer_object->get_consecutive_nonblank_lines() +
- $vao->get_cached_line_count();
-}
+sub tight_paren_follows {
-{ ## begin closure grind_batch_of_CODE
+ my ( $self, $K_to_go_0, $K_ic ) = @_;
- # The routines in this closure begin the processing of a 'batch' of code.
+ # Input parameters:
+ # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
+ # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
+ # Return parameter:
+ # false if we want a break after the closing do brace
+ # true if we do not want a break after the closing do brace
- # A variable to keep track of consecutive nonblank lines so that we can
- # insert occasional blanks
- my @nonblank_lines_at_depth;
+ # We are at the closing brace of a 'do' block. See if this brace is
+ # followed by a closing paren, and if so, set a flag which indicates
+ # that we do not want a line break between the '}' and ')'.
- # A variable to remember maximum size of previous batches; this is needed
- # by the logical padding routine
- my $peak_batch_size;
- my $batch_count;
+ # xxxxx ( ...... do { ... } ) {
+ # ^-------looking at this brace, K_ic
- sub initialize_grind_batch_of_CODE {
- @nonblank_lines_at_depth = ();
- $peak_batch_size = 0;
- $batch_count = 0;
- return;
- }
+ # Subscript notation:
+ # _i = inner container (braces in this case)
+ # _o = outer container (parens in this case)
+ # _io = inner opening = '{'
+ # _ic = inner closing = '}'
+ # _oo = outer opening = '('
+ # _oc = outer closing = ')'
- # sub grind_batch_of_CODE receives sections of code which are the longest
- # possible lines without a break. In other words, it receives what is left
- # after applying all breaks forced by blank lines, block comments, side
- # comments, pod text, and structural braces. Its job is to break this code
- # down into smaller pieces, if necessary, which fit within the maximum
- # allowed line length. Then it sends the resulting lines of code on down
- # the pipeline to the VerticalAligner package, breaking the code into
- # continuation lines as necessary. The batch of tokens are in the "to_go"
- # arrays. The name 'grind' is slightly suggestive of breaking down the
- # long lines, but mainly it is easy to remember and find with an editor
- # search.
+ # |--K_oo |--K_oc = outer container
+ # xxxxx ( ...... do { ...... } ) {
+ # |--K_io |--K_ic = inner container
- # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
- # together in the following way:
+ # In general, the safe thing to do is return a 'false' value
+ # if the statement appears to be complex. This will have
+ # the downstream side-effect of opening up outer containers
+ # to help make complex code readable. But for simpler
+ # do blocks it can be preferable to keep the code compact
+ # by returning a 'true' value.
- # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
- # combines them into the largest sequences of tokens which might form a new
- # line.
- # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
- # lines.
+ return unless defined($K_ic);
+ my $rLL = $self->[_rLL_];
- # So sub 'process_line_of_CODE' builds up the longest possible continouus
- # sequences of tokens, regardless of line length, and then
- # grind_batch_of_CODE breaks these sequences back down into the new output
- # lines.
+ # we should only be called at a closing block
+ my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
+ return unless ($seqno_i); # shouldn't happen;
- # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
+ # This only applies if the next nonblank is a ')'
+ my $K_oc = $self->K_next_nonblank($K_ic);
+ return unless defined($K_oc);
+ my $token_next = $rLL->[$K_oc]->[_TOKEN_];
+ return unless ( $token_next eq ')' );
- my $DEBUG_GRIND;
+ my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
+ my $K_io = $self->[_K_opening_container_]->{$seqno_i};
+ my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
+ return unless ( defined($K_io) && defined($K_oo) );
- sub grind_batch_of_CODE {
+ # RULE 1: Do not break before a closing signature paren
+ # (regardless of complexity). This is a fix for issue git#22.
+ # Looking for something like:
+ # sub xxx ( ... do { ... } ) {
+ # ^----- next block_type
+ my $K_test = $self->K_next_nonblank($K_oc);
+ if ( defined($K_test) ) {
+ my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
+ if ( $block_type
+ && $rLL->[$K_test]->[_TYPE_] eq '{'
+ && $block_type =~ /$ANYSUB_PATTERN/ )
+ {
+ return 1;
+ }
+ }
- my ($self) = @_;
- my $file_writer_object = $self->[_file_writer_object_];
+ # RULE 2: Break if the contents within braces appears to be 'complex'. We
+ # base this decision on the number of tokens between braces.
- my $this_batch = $self->[_this_batch_];
- $batch_count++;
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^
- my $comma_count_in_batch = $this_batch->[_comma_count_in_batch_];
- my $starting_in_quote = $this_batch->[_starting_in_quote_];
- my $ending_in_quote = $this_batch->[_ending_in_quote_];
- my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
- my $rK_to_go = $this_batch->[_rK_to_go_];
+ # Although very simple, it has the advantages of (1) being insensitive to
+ # changes in lengths of identifier names, (2) easy to understand, implement
+ # and test. A test case for this is 't/snippets/long_line.in'.
- my $rLL = $self->[_rLL_];
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # if ( do { $2 !~ /&/ } ) { ... }
- my $rOpts_add_newlines = $rOpts->{'add-newlines'};
- my $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- my $rOpts_maximum_fields_per_table =
- $rOpts->{'maximum-fields-per-table'};
- my $rOpts_one_line_block_semicolons =
- $rOpts->{'one-line-block-semicolons'};
+ # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- # This routine is only called from sub flush_batch_of_code, so that
- # routine is a better spot for debugging.
- $DEBUG_GRIND && do {
- my $token = my $type = "";
- if ( $max_index_to_go >= 0 ) {
- $token = $tokens_to_go[$max_index_to_go];
- $type = $types_to_go[$max_index_to_go];
- }
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
- print STDERR <<EOM;
-grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
-$output_str
-EOM
- };
+ # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
+ # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
- my $comma_arrow_count_contained =
- $self->match_opening_and_closing_tokens();
+ return if ( $K_ic - $K_io > 16 );
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
+ # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
+ # As with the previous rule, we decide based on the token count
- # If this line ends in a code block brace, set breaks at any
- # previous closing code block braces to breakup a chain of code
- # blocks on one line. This is very rare but can happen for
- # user-defined subs. For example we might be looking at this:
- # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
- my $saw_good_break = 0; # flag to force breaks even if short line
- if (
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^^^
- # looking for opening or closing block brace
- $block_type_to_go[$max_index_to_go]
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # $K_io - $K_oo = 4 [Pass Rule 3]
+ # if ( do { $2 !~ /&/ } ) { ... }
- # but not one of these which are never duplicated on a line:
- # until|while|for|if|elsif|else
- && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
- }
- )
- {
- my $lev = $nesting_depth_to_go[$max_index_to_go];
+ # Example: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 9 [Pass rule 3]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- # Walk backwards from the end and
- # set break at any closing block braces at the same level.
- # But quit if we are not in a chain of blocks.
- for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
- last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
- next if ( $levels_to_go[$i] > $lev ); # skip past higher level
+ return if ( $K_io - $K_oo > 9 );
- if ( $block_type_to_go[$i] ) {
- if ( $tokens_to_go[$i] eq '}' ) {
- $self->set_forced_breakpoint($i);
- $saw_good_break = 1;
- }
- }
+ # RULE 4: Break if we have already broken this batch of output tokens
+ return if ( $K_oo < $K_to_go_0 );
- # quit if we see anything besides words, function, blanks
- # at this level
- elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
- }
- }
+ # RULE 5: Break if input is not on one line
+ # For example, we will set the flag for the following expression
+ # written in one line:
- my $imin = 0;
- my $imax = $max_index_to_go;
+ # This has: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 8 [Pass rule 3]
+ # $self->debug( 'Error: ' . do { local $/; <$err> } );
- # trim any blank tokens
- if ( $max_index_to_go >= 0 ) {
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- }
+ # but we break after the brace if it is on multiple lines on input, since
+ # the user may prefer it on multiple lines:
- # anything left to write?
- if ( $imin <= $imax ) {
+ # [Fail rule 5]
+ # $self->debug(
+ # 'Error: ' . do { local $/; <$err> }
+ # );
- my $last_line_leading_type = $self->[_last_line_leading_type_];
- my $last_line_leading_level = $self->[_last_line_leading_level_];
- my $last_last_line_leading_level =
- $self->[_last_last_line_leading_level_];
+ if ( !$rOpts->{'ignore-old-breakpoints'} ) {
+ my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
+ return if ( $iline_oo != $iline_oc );
+ }
- # add a blank line before certain key types but not after a comment
- if ( $last_line_leading_type !~ /^[#]/ ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
+ # OK to keep the paren tight
+ return 1;
+}
- # blank lines before subs except declarations and one-liners
- if ( $leading_type eq 'i' && $leading_token =~ /$SUB_PATTERN/ )
- {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if (
- $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
- }
+sub starting_one_line_block {
- # break before all package declarations
- elsif ($leading_token =~ /^(package\s)/
- && $leading_type eq 'i' )
- {
- $want_blank = $rOpts->{'blank-lines-before-packages'};
- }
+ # after seeing an opening curly brace, look for the closing brace and see
+ # if the entire block will fit on a line. This routine is not always right
+ # so a check is made later (at the closing brace) to make sure we really
+ # have a one-line block. We have to do this preliminary check, though,
+ # because otherwise we would always break at a semicolon within a one-line
+ # block if the block contains multiple statements.
- # break before certain key blocks except one-liners
- if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' )
- {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( $self->terminal_type_i( $imin, $imax ) ne '}' );
- }
+ my ( $self, $Kj, $K_last_nonblank, $K_last, $level, $slevel, $ci_level ) =
+ @_;
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($leading_type eq 'k'
- && $last_line_leading_type ne 'b'
- && $leading_token =~
- /^(unless|if|while|until|for|foreach)$/ )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rLL = $self->[_rLL_];
- # patch for RT #128216: no blank line inserted at a level
- # change
- if ( $levels_to_go[$imin] != $last_line_leading_level ) {
- $lc = 0;
- }
+ # kill any current block - we can only go 1 deep
+ destroy_one_line_block();
- $want_blank =
- $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && $self->consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && $self->terminal_type_i( $imin, $imax ) ne '}';
- }
+ # return value:
+ # 1=distance from start of block to opening brace exceeds line length
+ # 0=otherwise
- # Check for blank lines wanted before a closing brace
- if ( $leading_token eq '}' ) {
- if ( $rOpts->{'blank-lines-before-closing-block'}
- && $block_type_to_go[$imin]
- && $block_type_to_go[$imin] =~
- /$blank_lines_before_closing_block_pattern/ )
- {
- my $nblanks =
- $rOpts->{'blank-lines-before-closing-block'};
- if ( $nblanks > $want_blank ) {
- $want_blank = $nblanks;
- }
- }
- }
+ my $i_start = 0;
- if ($want_blank) {
+ # shouldn't happen: there must have been a prior call to
+ # store_token_to_go to put the opening brace in the output stream
+ if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
+ Fault("program bug: store_token_to_go called incorrectly\n");
+ }
- # future: send blank line down normal path to VerticalAligner
- $self->flush_vertical_aligner();
- $file_writer_object->require_blank_code_lines($want_blank);
- }
- }
+ # return if block should be broken
+ my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
+ if ( $rbreak_container->{$type_sequence} ) {
+ return 0;
+ }
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] )
- )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
- }
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
- }
+ my $block_type = $rLL->[$Kj]->[_BLOCK_TYPE_];
+ my $index_max_forced_break = get_index_max_forced_break();
- $self->[_last_line_leading_type_] = $last_line_leading_type;
- $self->[_last_line_leading_level_] = $last_line_leading_level;
- $self->[_last_last_line_leading_level_] =
- $last_last_line_leading_level;
+ my $previous_nonblank_token = '';
+ my $i_last_nonblank = -1;
+ if ( defined($K_last_nonblank) ) {
+ $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
+ if ( $i_last_nonblank >= 0 ) {
+ $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
+ }
+ }
- # add a couple of extra terminal blank tokens
- $self->pad_array_to_go();
+ # find the starting keyword for this block (such as 'if', 'else', ...)
+ if ( $max_index_to_go == 0
+ || $block_type =~ /^[\{\}\;\:]$/
+ || $block_type =~ /^package/ )
+ {
+ $i_start = $max_index_to_go;
+ }
- # set all forced breakpoints for good list formatting
- my $is_long_line =
- $self->excess_line_length( $imin, $max_index_to_go ) > 0;
+ # the previous nonblank token should start these block types
+ elsif (
+ $i_last_nonblank >= 0
+ && ( $previous_nonblank_token eq $block_type
+ || $block_type =~ /$ANYSUB_PATTERN/
+ || $block_type =~ /\(\)/ )
+ )
+ {
+ $i_start = $i_last_nonblank;
- my $old_line_count_in_batch =
- $self->get_old_line_count( $K_to_go[0],
- $K_to_go[$max_index_to_go] );
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intack, and cause the parenthesized
+ # expression to break open. That looks bad. However, actually
+ # searching for the opening paren is slow and tedius.
+ # The actual keyword is often at the start of a line, but might not be.
+ # For example, we might have an anonymous sub with signature list
+ # following a =>. It is safe to mark the start anywhere before the
+ # opening paren, so we just go back to the prevoious break (or start of
+ # the line) if that is before the opening paren. The minor downside is
+ # that we may very occasionally break open a block unnecessarily.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $level ) { return 0 }
+ }
+ }
- if (
- $is_long_line
- || $old_line_count_in_batch > 1
+ elsif ( $previous_nonblank_token eq ')' ) {
- # must always call scan_list() with unbalanced batches because it
- # is maintaining some stacks
- || is_unbalanced_batch()
+ # For something like "if (xxx) {", the keyword "if" will be
+ # just after the most recent break. This will be 0 unless
+ # we have just killed a one-line block and are starting another.
+ # (doif.t)
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
- # call scan_list if we might want to break at commas
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ $stripped_block_type =~ s/\(\)$//;
- # call scan_list if user may want to break open some one-line
- # hash references
- || ( $comma_arrow_count_contained
- && $rOpts_comma_arrow_breakpoints != 3 )
- )
- {
- ## This caused problems in one version of perl for unknown reasons:
- ## $saw_good_break ||= scan_list();
- my $sgb = $self->scan_list();
- $saw_good_break ||= $sgb;
- }
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
+ return 0;
+ }
+ }
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
+ # patch for SWITCH/CASE to retain one-line case/when blocks
+ elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
- # write a single line if..
- if (
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+ unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ return 0;
+ }
+ }
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
+ else {
+ return 1;
+ }
- # or, we don't already have an interior breakpoint
- # and we didn't see a good breakpoint
- || (
- !get_forced_breakpoint_count()
- && !$saw_good_break
+ my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
- # and this line is 'short'
- && !$is_long_line
- )
- )
- {
- @{$ri_first} = ($imin);
- @{$ri_last} = ($imax);
- }
+ # see if length is too long to even start
+ if ( $pos > maximum_line_length($i_start) ) {
+ return 1;
+ }
- # otherwise use multiple lines
- else {
+ foreach my $Ki ( $Kj + 1 .. $K_last ) {
- ( $ri_first, $ri_last, my $colon_count ) =
- $self->set_continuation_breaks($saw_good_break);
+ # old whitespace could be arbitrarily large, so don't use it
+ if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
+ else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
- $self->break_all_chain_tokens( $ri_first, $ri_last );
+ # ignore some small blocks
+ my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
+ my $nobreak = $rshort_nested->{$type_sequence};
- $self->break_equals( $ri_first, $ri_last );
+ # Return false result if we exceed the maximum line length,
+ if ( $pos > maximum_line_length($i_start) ) {
+ return 0;
+ }
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ( $rOpts->{'recombine'} ) {
- ( $ri_first, $ri_last ) =
- $self->recombine_breakpoints( $ri_first, $ri_last );
- }
+ # keep going for non-containers
+ elsif ( !$type_sequence ) {
- $self->insert_final_ternary_breaks( $ri_first, $ri_last )
- if $colon_count;
- }
+ }
- $self->insert_breaks_before_list_opening_containers( $ri_first,
- $ri_last );
+ # return if we encounter another opening brace before finding the
+ # closing brace.
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
+ && $rLL->[$Ki]->[_TYPE_] eq '{'
+ && $rLL->[$Ki]->[_BLOCK_TYPE_]
+ && !$nobreak )
+ {
+ return 0;
+ }
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad =
- $self->correct_lp_indentation( $ri_first, $ri_last );
- }
- $self->unmask_phantom_semicolons( $ri_first, $ri_last );
- if ( $rOpts_one_line_block_semicolons == 0 ) {
- $self->delete_one_line_semicolons( $ri_first, $ri_last );
- }
+ # if we find our closing brace..
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
+ && $rLL->[$Ki]->[_TYPE_] eq '}'
+ && $rLL->[$Ki]->[_BLOCK_TYPE_]
+ && !$nobreak )
+ {
- # The line breaks for this batch of code have been finalized. Now we
- # can to package the results for further processing. We will switch
- # from the local '_to_go' buffer arrays (i-index) back to the global
- # token arrays (K-index) at this point.
- my $rlines_K;
- my $index_error;
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- if ( $iend - $ibeg != $Kend - $Kbeg ) {
- $index_error = $n unless defined($index_error);
+ # be sure any trailing comment also fits on the line
+ my $Ki_nonblank = $Ki;
+ if ( $Ki_nonblank < $K_last ) {
+ $Ki_nonblank++;
+ if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
+ && $Ki_nonblank < $K_last )
+ {
+ $Ki_nonblank++;
}
- push @{$rlines_K},
- [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
}
- # Check correctness of the mapping between the i and K token indexes
- if ( defined($index_error) ) {
+ # Patch for one-line sort/map/grep/eval blocks with side comments:
+ # We will ignore the side comment length for sort/map/grep/eval
+ # because this can lead to statements which change every time
+ # perltidy is run. Here is an example from Denis Moskowitz which
+ # oscillates between these two states without this patch:
- # Temporary debug code - should never get here
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- my $idiff = $iend - $ibeg;
- my $Kdiff = $Kend - $Kbeg;
- print STDERR <<EOM;
-line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
-EOM
- }
- Fault(
- "Index error at line $index_error; i and K ranges differ");
- }
+## --------
+## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+##
+## grep {
+## $_->foo ne 'bar'
+## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+## --------
- $this_batch->[_rlines_K_] = $rlines_K;
- $this_batch->[_ibeg0_] = $ri_first->[0];
- $this_batch->[_peak_batch_size_] = $peak_batch_size;
- $this_batch->[_do_not_pad_] = $do_not_pad;
- $this_batch->[_batch_count_] = $batch_count;
+ # When the first line is input it gets broken apart by the main
+ # line break logic in sub process_line_of_CODE.
+ # When the second line is input it gets recombined by
+ # process_line_of_CODE and passed to the output routines. The
+ # output routines (set_continuation_breaks) do not break it apart
+ # because the bond strengths are set to the highest possible value
+ # for grep/map/eval/sort blocks, so the first version gets output.
+ # It would be possible to fix this by changing bond strengths,
+ # but they are high to prevent errors in older versions of perl.
- $self->send_lines_to_vertical_aligner();
+ if ( $Ki < $K_last
+ && $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
+ && !$is_sort_map_grep{$block_type} )
+ {
- # Insert any requested blank lines after an opening brace. We have
- # to skip back before any side comment to find the terminal token
- my $iterm;
- for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
- next if $types_to_go[$iterm] eq '#';
- next if $types_to_go[$iterm] eq 'b';
- last;
- }
+ $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
- # write requested number of blank lines after an opening block brace
- if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
- if ( $rOpts->{'blank-lines-after-opening-block'}
- && $block_type_to_go[$iterm]
- && $block_type_to_go[$iterm] =~
- /$blank_lines_after_opening_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
- $self->flush_vertical_aligner();
- $file_writer_object->require_blank_code_lines($nblanks);
+ if ( $Ki_nonblank > $Ki + 1 ) {
+
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
+ $pos += 1;
+ }
+ else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
+ }
+
+ if ( $pos >= maximum_line_length($i_start) ) {
+ return 0;
}
}
- }
- # Remember the largest batch size processed. This is needed by the
- # logical padding routine to avoid padding the first nonblank token
- if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
- $peak_batch_size = $max_index_to_go;
+ # ok, it's a one-line block
+ create_one_line_block( $i_start, 20 );
+ return 0;
}
- return;
+ # just keep going for other characters
+ else {
+ }
}
-} ## end closure grind_batch_of_CODE
-sub note_added_semicolon {
- my ( $self, $line_number ) = @_;
- $self->[_last_added_semicolon_at_] = $line_number;
- if ( $self->[_added_semicolon_count_] == 0 ) {
- $self->[_first_added_semicolon_at_] = $line_number;
+ # Allow certain types of new one-line blocks to form by joining
+ # input lines. These can be safely done, but for other block types,
+ # we keep old one-line blocks but do not form new ones. It is not
+ # always a good idea to make as many one-line blocks as possible,
+ # so other types are not done. The user can always use -mangle.
+ if ( $want_one_line_block{$block_type} ) {
+ create_one_line_block( $i_start, 1 );
}
- $self->[_added_semicolon_count_]++;
- write_logfile_entry("Added ';' here\n");
- return;
+ return 0;
}
-sub note_deleted_semicolon {
- my ( $self, $line_number ) = @_;
- $self->[_last_deleted_semicolon_at_] = $line_number;
- if ( $self->[_deleted_semicolon_count_] == 0 ) {
- $self->[_first_deleted_semicolon_at_] = $line_number;
- }
- $self->[_deleted_semicolon_count_]++;
- write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
- return;
-}
+sub unstore_token_to_go {
-sub note_embedded_tab {
- my ( $self, $line_number ) = @_;
- $self->[_embedded_tab_count_]++;
- $self->[_last_embedded_tab_at_] = $line_number;
- if ( !$self->[_first_embedded_tab_at_] ) {
- $self->[_first_embedded_tab_at_] = $line_number;
+ # remove most recent token from output stream
+ my $self = shift;
+ if ( $max_index_to_go > 0 ) {
+ $max_index_to_go--;
}
-
- if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
+ else {
+ $max_index_to_go = UNDEFINED_INDEX;
}
return;
}
-sub starting_one_line_block {
+sub compare_indentation_levels {
- # after seeing an opening curly brace, look for the closing brace and see
- # if the entire block will fit on a line. This routine is not always right
- # so a check is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check, though,
- # because otherwise we would always break at a semicolon within a one-line
- # block if the block contains multiple statements.
+ # Check to see if output line tabbing agrees with input line
+ # this can be very useful for debugging a script which has an extra
+ # or missing brace.
- my ( $self, $Kj, $K_last_nonblank, $K_last, $level, $slevel, $ci_level ) =
- @_;
+ my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
+ return unless ( defined($K_first) );
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rLL = $self->[_rLL_];
+ my $rLL = $self->[_rLL_];
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
+ my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $structural_indentation_level = $radjusted_levels->[$K_first];
+ }
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
+ my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
+ && $rLL->[$K_first]->[_BLOCK_TYPE_];
- my $i_start = 0;
+ if ( $guessed_indentation_level ne $structural_indentation_level ) {
+ $self->[_last_tabbing_disagreement_] = $line_number;
- # shouldn't happen: there must have been a prior call to
- # store_token_to_go to put the opening brace in the output stream
- if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
- Fault("program bug: store_token_to_go called incorrectly\n");
- }
+ if ($is_closing_block) {
- # return if block should be broken
- my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
- if ( $rbreak_container->{$type_sequence} ) {
- return 0;
- }
+ if ( !$self->[_in_brace_tabbing_disagreement_] ) {
+ $self->[_in_brace_tabbing_disagreement_] = $line_number;
+ }
+ if ( !$self->[_first_brace_tabbing_disagreement_] ) {
+ $self->[_first_brace_tabbing_disagreement_] = $line_number;
+ }
- my $block_type = $rLL->[$Kj]->[_BLOCK_TYPE_];
- my $index_max_forced_break = get_index_max_forced_break();
+ }
- my $previous_nonblank_token = '';
- my $i_last_nonblank = -1;
- if ( defined($K_last_nonblank) ) {
- $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
- if ( $i_last_nonblank >= 0 ) {
- $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
+ if ( !$self->[_in_tabbing_disagreement_] ) {
+ $self->[_tabbing_disagreement_count_]++;
+
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
+ );
+ }
+ $self->[_in_tabbing_disagreement_] = $line_number;
+ $self->[_first_tabbing_disagreement_] = $line_number
+ unless ( $self->[_first_tabbing_disagreement_] );
}
}
+ else {
- # find the starting keyword for this block (such as 'if', 'else', ...)
- if ( $max_index_to_go == 0
- || $block_type =~ /^[\{\}\;\:]$/
- || $block_type =~ /^package/ )
- {
- $i_start = $max_index_to_go;
- }
+ $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
- # the previous nonblank token should start these block types
- elsif (
- $i_last_nonblank >= 0
- && ( $previous_nonblank_token eq $block_type
- || $block_type =~ /$ANYSUB_PATTERN/
- || $block_type =~ /\(\)/ )
- )
- {
- $i_start = $i_last_nonblank;
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
+ if ($in_tabbing_disagreement) {
+
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"End indentation disagreement from input line $in_tabbing_disagreement\n"
+ );
+
+ if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
+ {
+ write_logfile_entry(
+ "No further tabbing disagreements will be noted\n");
+ }
+ }
+ $self->[_in_tabbing_disagreement_] = 0;
- # For signatures and extended syntax ...
- # If this brace follows a parenthesized list, we should look back to
- # find the keyword before the opening paren because otherwise we might
- # form a one line block which stays intack, and cause the parenthesized
- # expression to break open. That looks bad. However, actually
- # searching for the opening paren is slow and tedius.
- # The actual keyword is often at the start of a line, but might not be.
- # For example, we might have an anonymous sub with signature list
- # following a =>. It is safe to mark the start anywhere before the
- # opening paren, so we just go back to the prevoious break (or start of
- # the line) if that is before the opening paren. The minor downside is
- # that we may very occasionally break open a block unnecessarily.
- if ( $tokens_to_go[$i_start] eq ')' ) {
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
- my $lev = $levels_to_go[$i_start];
- if ( $lev > $level ) { return 0 }
}
}
+ return;
+}
- elsif ( $previous_nonblank_token eq ')' ) {
+###################################################
+# CODE SECTION 8: Utilities for setting breakpoints
+###################################################
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
+{ ## begin closure set_forced_breakpoint
- # Patch to avoid breaking short blocks defined with extended_syntax:
- # Strip off any trailing () which was added in the parser to mark
- # the opening keyword. For example, in the following
- # create( TypeFoo $e) {$bubba}
- # the blocktype would be marked as create()
- my $stripped_block_type = $block_type;
- $stripped_block_type =~ s/\(\)$//;
+ my $forced_breakpoint_count;
+ my $forced_breakpoint_undo_count;
+ my @forced_breakpoint_undo_stack;
+ my $index_max_forced_break;
- unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
- return 0;
- }
+ sub initialize_forced_breakpoint_vars {
+ $forced_breakpoint_count = 0;
+ $index_max_forced_break = UNDEFINED_INDEX;
+ $forced_breakpoint_undo_count = 0;
+ @forced_breakpoint_undo_stack = ();
+ return;
}
- # patch for SWITCH/CASE to retain one-line case/when blocks
- elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+ sub get_forced_breakpoint_count {
+ return $forced_breakpoint_count;
+ }
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
- }
+ sub get_forced_breakpoint_undo_count {
+ return $forced_breakpoint_undo_count;
}
- else {
- return 1;
+ sub get_index_max_forced_break {
+ return $index_max_forced_break;
}
- my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
+ sub set_fake_breakpoint {
- # see if length is too long to even start
- if ( $pos > maximum_line_length($i_start) ) {
- return 1;
+ # Just bump up the breakpoint count as a signal that there are breaks.
+ # This is useful if we have breaks but may want to postpone deciding where
+ # to make them.
+ $forced_breakpoint_count++;
+ return;
}
- foreach my $Ki ( $Kj + 1 .. $K_last ) {
+ my $DEBUG_FORCE;
- # old whitespace could be arbitrarily large, so don't use it
- if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
- else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
+ sub set_forced_breakpoint {
+ my ( $self, $i ) = @_;
- # ignore some small blocks
- my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
- my $nobreak = $rshort_nested->{$type_sequence};
+ return unless defined $i && $i >= 0;
- # Return false result if we exceed the maximum line length,
- if ( $pos > maximum_line_length($i_start) ) {
- return 0;
- }
+ # no breaks between welded tokens
+ return if ( $self->weld_len_right_to_go($i) );
- # keep going for non-containers
- elsif ( !$type_sequence ) {
+ # when called with certain tokens, use bond strengths to decide
+ # if we break before or after it
+ my $token = $tokens_to_go[$i];
+ if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
+ if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
- # return if we encounter another opening brace before finding the
- # closing brace.
- elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
- && $rLL->[$Ki]->[_TYPE_] eq '{'
- && $rLL->[$Ki]->[_BLOCK_TYPE_]
- && !$nobreak )
- {
- return 0;
- }
+ # breaks are forced before 'if' and 'unless'
+ elsif ( $is_if_unless{$token} ) { $i-- }
- # if we find our closing brace..
- elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
- && $rLL->[$Ki]->[_TYPE_] eq '}'
- && $rLL->[$Ki]->[_BLOCK_TYPE_]
- && !$nobreak )
- {
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
- # be sure any trailing comment also fits on the line
- my $Ki_nonblank = $Ki;
- if ( $Ki_nonblank < $K_last ) {
- $Ki_nonblank++;
- if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
- && $Ki_nonblank < $K_last )
- {
- $Ki_nonblank++;
+ $DEBUG_FORCE && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
+ };
+
+ ######################################################################
+ # NOTE: if we call set_closing_breakpoint below it will then call this
+ # routing back. So there is the possibility of an infinite loop if a
+ # programming error is made. As a precaution, I have added a check on
+ # the forced_breakpoint flag, so that we won't keep trying to set it.
+ # That will give additional protection against a loop.
+ ######################################################################
+ if ( $i_nonblank >= 0
+ && $nobreak_to_go[$i_nonblank] == 0
+ && !$forced_breakpoint_to_go[$i_nonblank] )
+ {
+ $forced_breakpoint_to_go[$i_nonblank] = 1;
+
+ if ( $i_nonblank > $index_max_forced_break ) {
+ $index_max_forced_break = $i_nonblank;
+ }
+ $forced_breakpoint_count++;
+ $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
+ = $i_nonblank;
+
+ # if we break at an opening container..break at the closing
+ if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
+ $self->set_closing_breakpoint($i_nonblank);
}
}
+ }
+ return;
+ }
- # Patch for one-line sort/map/grep/eval blocks with side comments:
- # We will ignore the side comment length for sort/map/grep/eval
- # because this can lead to statements which change every time
- # perltidy is run. Here is an example from Denis Moskowitz which
- # oscillates between these two states without this patch:
+ sub clear_breakpoint_undo_stack {
+ my ($self) = @_;
+ $forced_breakpoint_undo_count = 0;
+ return;
+ }
-## --------
-## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-##
-## grep {
-## $_->foo ne 'bar'
-## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-## --------
-
- # When the first line is input it gets broken apart by the main
- # line break logic in sub process_line_of_CODE.
- # When the second line is input it gets recombined by
- # process_line_of_CODE and passed to the output routines. The
- # output routines (set_continuation_breaks) do not break it apart
- # because the bond strengths are set to the highest possible value
- # for grep/map/eval/sort blocks, so the first version gets output.
- # It would be possible to fix this by changing bond strengths,
- # but they are high to prevent errors in older versions of perl.
-
- if ( $Ki < $K_last
- && $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
- && !$is_sort_map_grep{$block_type} )
- {
+ my $DEBUG_UNDOBP;
- $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
+ sub undo_forced_breakpoint_stack {
- if ( $Ki_nonblank > $Ki + 1 ) {
+ my ( $self, $i_start ) = @_;
+ if ( $i_start < 0 ) {
+ $i_start = 0;
+ my ( $a, $b, $c ) = caller();
+ warning(
+"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
+ );
+ }
- # source whitespace could be anything, assume
- # at least one space before the hash on output
- if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
- $pos += 1;
- }
- else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
- }
+ while ( $forced_breakpoint_undo_count > $i_start ) {
+ my $i =
+ $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ $forced_breakpoint_to_go[$i] = 0;
+ $forced_breakpoint_count--;
- if ( $pos >= maximum_line_length($i_start) ) {
- return 0;
- }
+ $DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
+ };
}
- # ok, it's a one-line block
- create_one_line_block( $i_start, 20 );
- return 0;
- }
-
- # just keep going for other characters
- else {
+ # shouldn't happen, but not a critical error
+ else {
+ $DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
+ };
+ }
}
+ return;
}
+} ## end closure set_forced_breakpoint
- # Allow certain types of new one-line blocks to form by joining
- # input lines. These can be safely done, but for other block types,
- # we keep old one-line blocks but do not form new ones. It is not
- # always a good idea to make as many one-line blocks as possible,
- # so other types are not done. The user can always use -mangle.
- if ( $want_one_line_block{$block_type} ) {
- create_one_line_block( $i_start, 1 );
- }
- return 0;
-}
+{ ## begin closure set_closing_breakpoint
-sub unstore_token_to_go {
+ my %postponed_breakpoint;
- # remove most recent token from output stream
- my $self = shift;
- if ( $max_index_to_go > 0 ) {
- $max_index_to_go--;
+ sub initialize_postponed_breakpoint {
+ %postponed_breakpoint = ();
+ return;
}
- else {
- $max_index_to_go = UNDEFINED_INDEX;
+
+ sub has_postponed_breakpoint {
+ my ($seqno) = @_;
+ return $postponed_breakpoint{$seqno};
}
- return;
-}
-sub want_blank_line {
- my $self = shift;
- $self->flush();
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->want_blank_line();
- return;
-}
+ sub set_closing_breakpoint {
-sub write_unindented_line {
- my ( $self, $line ) = @_;
- $self->flush();
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->write_line($line);
- return;
-}
+ # set a breakpoint at a matching closing token
+ # at present, this is only used to break at a ':' which matches a '?'
+ my ( $self, $i_break ) = @_;
-sub undo_ci {
+ if ( $mate_index_to_go[$i_break] >= 0 ) {
- # Undo continuation indentation in certain sequences
- # For example, we can undo continuation indentation in sort/map/grep chains
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- # To align the map/sort/grep keywords like this:
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- my ( $self, $ri_first, $ri_last ) = @_;
- my ( $line_1, $line_2, $lev_last );
- my $this_line_is_semicolon_terminated;
- my $max_line = @{$ri_first} - 1;
+ # CAUTION: infinite recursion possible here:
+ # set_closing_breakpoint calls set_forced_breakpoint, and
+ # set_forced_breakpoint call set_closing_breakpoint
+ # ( test files attrib.t, BasicLyx.pm.html).
+ # Don't reduce the '2' in the statement below
+ if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
- # looking at each line of this batch..
- # We are looking at leading tokens and looking for a sequence
- # all at the same level and higher level than enclosing lines.
- foreach my $line ( 0 .. $max_line ) {
+ # break before } ] and ), but sub set_forced_breakpoint will decide
+ # to break before or after a ? and :
+ my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
+ $self->set_forced_breakpoint(
+ $mate_index_to_go[$i_break] - $inc );
+ }
+ }
+ else {
+ my $type_sequence = $type_sequence_to_go[$i_break];
+ if ($type_sequence) {
+ my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
+ $postponed_breakpoint{$type_sequence} = 1;
+ }
+ }
+ return;
+ }
+} ## end closure set_closing_breakpoint
- my $ibeg = $ri_first->[$line];
- my $lev = $levels_to_go[$ibeg];
- if ( $line > 0 ) {
+#########################################
+# CODE SECTION 9: Process batches of code
+#########################################
- # if we have started a chain..
- if ($line_1) {
+{ ## begin closure grind_batch_of_CODE
- # see if it continues..
- if ( $lev == $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
+ # The routines in this closure begin the processing of a 'batch' of code.
- # chain continues...
- # check for chain ending at end of a statement
- if ( $line == $max_line ) {
+ # A variable to keep track of consecutive nonblank lines so that we can
+ # insert occasional blanks
+ my @nonblank_lines_at_depth;
- # see of this line ends a statement
- my $iend = $ri_last->[$line];
- $this_line_is_semicolon_terminated =
- $types_to_go[$iend] eq ';'
+ # A variable to remember maximum size of previous batches; this is needed
+ # by the logical padding routine
+ my $peak_batch_size;
+ my $batch_count;
- # with possible side comment
- || ( $types_to_go[$iend] eq '#'
- && $iend - $ibeg >= 2
- && $types_to_go[ $iend - 2 ] eq ';'
- && $types_to_go[ $iend - 1 ] eq 'b' );
- }
- $line_2 = $line if ($this_line_is_semicolon_terminated);
- }
- else {
+ sub initialize_grind_batch_of_CODE {
+ @nonblank_lines_at_depth = ();
+ $peak_batch_size = 0;
+ $batch_count = 0;
+ return;
+ }
- # kill chain
- $line_1 = undef;
- }
- }
- elsif ( $lev < $lev_last ) {
+ # sub grind_batch_of_CODE receives sections of code which are the longest
+ # possible lines without a break. In other words, it receives what is left
+ # after applying all breaks forced by blank lines, block comments, side
+ # comments, pod text, and structural braces. Its job is to break this code
+ # down into smaller pieces, if necessary, which fit within the maximum
+ # allowed line length. Then it sends the resulting lines of code on down
+ # the pipeline to the VerticalAligner package, breaking the code into
+ # continuation lines as necessary. The batch of tokens are in the "to_go"
+ # arrays. The name 'grind' is slightly suggestive of breaking down the
+ # long lines, but mainly it is easy to remember and find with an editor
+ # search.
- # chain ends with previous line
- $line_2 = $line - 1;
- }
- elsif ( $lev > $lev_last ) {
+ # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
+ # together in the following way:
- # kill chain
- $line_1 = undef;
- }
+ # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
+ # combines them into the largest sequences of tokens which might form a new
+ # line.
+ # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
+ # lines.
- # undo the continuation indentation if a chain ends
- if ( defined($line_2) && defined($line_1) ) {
- my $continuation_line_count = $line_2 - $line_1 + 1;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
- (0) x ($continuation_line_count)
- if ( $continuation_line_count >= 0 );
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
- = @reduced_spaces_to_go[ @{$ri_first}
- [ $line_1 .. $line_2 ] ];
- $line_1 = undef;
- }
- }
+ # So sub 'process_line_of_CODE' builds up the longest possible continouus
+ # sequences of tokens, regardless of line length, and then
+ # grind_batch_of_CODE breaks these sequences back down into the new output
+ # lines.
- # not in a chain yet..
- else {
+ # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
- # look for start of a new sort/map/grep chain
- if ( $lev > $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
- $line_1 = $line;
- }
- }
- }
- }
- $lev_last = $lev;
- }
- return;
-}
-
-sub undo_lp_ci {
+ my $DEBUG_GRIND;
- # If there is a single, long parameter within parens, like this:
- #
- # $self->command( "/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?" );
- #
- # we can remove the continuation indentation of the 2nd and higher lines
- # to achieve this effect, which is more pleasing:
- #
- # $self->command("/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?");
+ sub grind_batch_of_CODE {
- my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
- @_;
- my $max_line = @{$ri_first} - 1;
+ my ($self) = @_;
+ my $file_writer_object = $self->[_file_writer_object_];
- # must be multiple lines
- return unless $max_line > $line_open;
+ my $this_batch = $self->[_this_batch_];
+ $batch_count++;
- my $lev_start = $levels_to_go[$i_start];
- my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
+ my $comma_count_in_batch = $this_batch->[_comma_count_in_batch_];
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+ my $rK_to_go = $this_batch->[_rK_to_go_];
- # see if all additional lines in this container have continuation
- # indentation
- my $n;
- my $line_1 = 1 + $line_open;
- for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- if ( $ibeg eq $closing_index ) { $n--; last }
- return if ( $lev_start != $levels_to_go[$ibeg] );
- return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
- last if ( $closing_index <= $iend );
- }
+ my $rLL = $self->[_rLL_];
- # we can reduce the indentation of all continuation lines
- my $continuation_line_count = $n - $line_open;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- (0) x ($continuation_line_count);
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
- return;
-}
+ my $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ my $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+ my $rOpts_maximum_fields_per_table =
+ $rOpts->{'maximum-fields-per-table'};
+ my $rOpts_one_line_block_semicolons =
+ $rOpts->{'one-line-block-semicolons'};
-sub pad_token {
+ # This routine is only called from sub flush_batch_of_code, so that
+ # routine is a better spot for debugging.
+ $DEBUG_GRIND && do {
+ my $token = my $type = "";
+ if ( $max_index_to_go >= 0 ) {
+ $token = $tokens_to_go[$max_index_to_go];
+ $type = $types_to_go[$max_index_to_go];
+ }
+ my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
+ print STDERR <<EOM;
+grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
+$output_str
+EOM
+ };
- # insert $pad_spaces before token number $ipad
- my ( $self, $ipad, $pad_spaces ) = @_;
- my $rLL = $self->[_rLL_];
- my $KK = $K_to_go[$ipad];
- my $tok = $rLL->[$KK]->[_TOKEN_];
- my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
+ my $comma_arrow_count_contained =
+ $self->match_opening_and_closing_tokens();
- if ( $pad_spaces > 0 ) {
- $tok = ' ' x $pad_spaces . $tok;
- $tok_len += $pad_spaces;
- }
- elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
- $tok = "";
- $tok_len = 0;
- }
- else {
+ # tell the -lp option we are outputting a batch so it can close
+ # any unfinished items in its stack
+ finish_lp_batch();
- # shouldn't happen
- return;
- }
+ # If this line ends in a code block brace, set breaks at any
+ # previous closing code block braces to breakup a chain of code
+ # blocks on one line. This is very rare but can happen for
+ # user-defined subs. For example we might be looking at this:
+ # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+ my $saw_good_break = 0; # flag to force breaks even if short line
+ if (
- $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
- $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+ # looking for opening or closing block brace
+ $block_type_to_go[$max_index_to_go]
- $token_lengths_to_go[$ipad] += $pad_spaces;
- $tokens_to_go[$ipad] = $tok;
+ # but not one of these which are never duplicated on a line:
+ # until|while|for|if|elsif|else
+ && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
+ }
+ )
+ {
+ my $lev = $nesting_depth_to_go[$max_index_to_go];
- foreach my $i ( $ipad .. $max_index_to_go ) {
- $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
- }
- return;
-}
+ # Walk backwards from the end and
+ # set break at any closing block braces at the same level.
+ # But quit if we are not in a chain of blocks.
+ for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+ last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
+ next if ( $levels_to_go[$i] > $lev ); # skip past higher level
-{ ## begin closure set_logical_padding
- my %is_math_op;
+ if ( $block_type_to_go[$i] ) {
+ if ( $tokens_to_go[$i] eq '}' ) {
+ $self->set_forced_breakpoint($i);
+ $saw_good_break = 1;
+ }
+ }
- BEGIN {
+ # quit if we see anything besides words, function, blanks
+ # at this level
+ elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
+ }
+ }
- my @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
- }
+ my $imin = 0;
+ my $imax = $max_index_to_go;
- sub set_logical_padding {
+ # trim any blank tokens
+ if ( $max_index_to_go >= 0 ) {
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
+ }
- # Look at a batch of lines and see if extra padding can improve the
- # alignment when there are certain leading operators. Here is an
- # example, in which some extra space is introduced before
- # '( $year' to make it line up with the subsequent lines:
- #
- # if ( ( $Year < 1601 )
- # || ( $Year > 2899 )
- # || ( $EndYear < 1601 )
- # || ( $EndYear > 2899 ) )
- # {
- # &Error_OutOfRange;
- # }
- #
- my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
- = @_;
- my $max_line = @{$ri_first} - 1;
+ # anything left to write?
+ if ( $imin <= $imax ) {
- my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
- $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
+ my $last_line_leading_type = $self->[_last_line_leading_type_];
+ my $last_line_leading_level = $self->[_last_line_leading_level_];
+ my $last_last_line_leading_level =
+ $self->[_last_last_line_leading_level_];
- # looking at each line of this batch..
- foreach my $line ( 0 .. $max_line - 1 ) {
+ # add a blank line before certain key types but not after a comment
+ if ( $last_line_leading_type !~ /^[#]/ ) {
+ my $want_blank = 0;
+ my $leading_token = $tokens_to_go[$imin];
+ my $leading_type = $types_to_go[$imin];
- # see if the next line begins with a logical operator
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
- $ibeg_next = $ri_first->[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $type_next = $types_to_go[$ibeg_next];
+ # blank lines before subs except declarations and one-liners
+ if ( $leading_type eq 'i' && $leading_token =~ /$SUB_PATTERN/ )
+ {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if (
+ $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
+ }
- $has_leading_op_next = ( $tok_next =~ /^\w/ )
- ? $is_chain_operator{$tok_next} # + - * / : ? && ||
- : $is_chain_operator{$type_next}; # and, or
+ # break before all package declarations
+ elsif ($leading_token =~ /^(package\s)/
+ && $leading_type eq 'i' )
+ {
+ $want_blank = $rOpts->{'blank-lines-before-packages'};
+ }
- next unless ($has_leading_op_next);
+ # break before certain key blocks except one-liners
+ if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' )
+ {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( $self->terminal_type_i( $imin, $imax ) ne '}' );
+ }
- # next line must not be at lesser depth
- next
- if ( $nesting_depth_to_go[$ibeg] >
- $nesting_depth_to_go[$ibeg_next] );
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($leading_type eq 'k'
+ && $last_line_leading_type ne 'b'
+ && $leading_token =~
+ /^(unless|if|while|until|for|foreach)$/ )
+ {
+ my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
- # identify the token in this line to be padded on the left
- $ipad = undef;
+ # patch for RT #128216: no blank line inserted at a level
+ # change
+ if ( $levels_to_go[$imin] != $last_line_leading_level ) {
+ $lc = 0;
+ }
- # handle lines at same depth...
- if ( $nesting_depth_to_go[$ibeg] ==
- $nesting_depth_to_go[$ibeg_next] )
- {
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $self->consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && $self->terminal_type_i( $imin, $imax ) ne '}';
+ }
- # if this is not first line of the batch ...
- if ( $line > 0 ) {
-
- # and we have leading operator..
- next if $has_leading_op;
-
- # Introduce padding if..
- # 1. the previous line is at lesser depth, or
- # 2. the previous line ends in an assignment
- # 3. the previous line ends in a 'return'
- # 4. the previous line ends in a comma
- # Example 1: previous line at lesser depth
- # if ( ( $Year < 1601 ) # <- we are here but
- # || ( $Year > 2899 ) # list has not yet
- # || ( $EndYear < 1601 ) # collapsed vertically
- # || ( $EndYear > 2899 ) )
- # {
- #
- # Example 2: previous line ending in assignment:
- # $leapyear =
- # $year % 4 ? 0 # <- We are here
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- #
- # Example 3: previous line ending in comma:
- # push @expr,
- # /test/ ? undef
- # : eval($_) ? 1
- # : eval($_) ? 1
- # : 0;
-
- # be sure levels agree (do not indent after an indented 'if')
- next
- if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
-
- # allow padding on first line after a comma but only if:
- # (1) this is line 2 and
- # (2) there are at more than three lines and
- # (3) lines 3 and 4 have the same leading operator
- # These rules try to prevent padding within a long
- # comma-separated list.
- my $ok_comma;
- if ( $types_to_go[$iendm] eq ','
- && $line == 1
- && $max_line > 2 )
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
{
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- my $tok_next_next = $tokens_to_go[$ibeg_next_next];
- $ok_comma = $tok_next_next eq $tok_next;
+ my $nblanks =
+ $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
+ }
}
+ }
- next
- unless (
- $is_assignment{ $types_to_go[$iendm] }
- || $ok_comma
- || ( $nesting_depth_to_go[$ibegm] <
- $nesting_depth_to_go[$ibeg] )
- || ( $types_to_go[$iendm] eq 'k'
- && $tokens_to_go[$iendm] eq 'return' )
- );
+ if ($want_blank) {
- # we will add padding before the first token
- $ipad = $ibeg;
+ # future: send blank line down normal path to VerticalAligner
+ $self->flush_vertical_aligner();
+ $file_writer_object->require_blank_code_lines($want_blank);
}
+ }
- # for first line of the batch..
- else {
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ $last_last_line_leading_level = $last_line_leading_level;
+ $last_line_leading_level = $levels_to_go[$imin];
+ if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
+ $last_line_leading_type = $types_to_go[$imin];
+ if ( $last_line_leading_level == $last_last_line_leading_level
+ && $last_line_leading_type ne 'b'
+ && $last_line_leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$last_line_leading_level] )
+ )
+ {
+ $nonblank_lines_at_depth[$last_line_leading_level]++;
+ }
+ else {
+ $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ }
- # WARNING: Never indent if first line is starting in a
- # continued quote, which would change the quote.
- next if $starting_in_quote;
+ $self->[_last_line_leading_type_] = $last_line_leading_type;
+ $self->[_last_line_leading_level_] = $last_line_leading_level;
+ $self->[_last_last_line_leading_level_] =
+ $last_last_line_leading_level;
- # if this is text after closing '}'
- # then look for an interior token to pad
- if ( $types_to_go[$ibeg] eq '}' ) {
+ # add a couple of extra terminal blank tokens
+ $self->pad_array_to_go();
- }
+ # set all forced breakpoints for good list formatting
+ my $is_long_line =
+ $self->excess_line_length( $imin, $max_index_to_go ) > 0;
- # otherwise, we might pad if it looks really good
- else {
+ my $old_line_count_in_batch =
+ $self->get_old_line_count( $K_to_go[0],
+ $K_to_go[$max_index_to_go] );
- # we might pad token $ibeg, so be sure that it
- # is at the same depth as the next line.
- next
- if ( $nesting_depth_to_go[$ibeg] !=
- $nesting_depth_to_go[$ibeg_next] );
+ if (
+ $is_long_line
+ || $old_line_count_in_batch > 1
- # We can pad on line 1 of a statement if at least 3
- # lines will be aligned. Otherwise, it
- # can look very confusing.
+ # must always call scan_list() with unbalanced batches because it
+ # is maintaining some stacks
+ || is_unbalanced_batch()
- # We have to be careful not to pad if there are too few
- # lines. The current rule is:
- # (1) in general we require at least 3 consecutive lines
- # with the same leading chain operator token,
- # (2) but an exception is that we only require two lines
- # with leading colons if there are no more lines. For example,
- # the first $i in the following snippet would get padding
- # by the second rule:
- #
- # $i == 1 ? ( "First", "Color" )
- # : $i == 2 ? ( "Then", "Rarity" )
- # : ( "Then", "Name" );
+ # call scan_list if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
- if ( $max_line > 1 ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $tokens_differ;
+ # call scan_list if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
+ )
+ {
+ ## This caused problems in one version of perl for unknown reasons:
+ ## $saw_good_break ||= scan_list();
+ my $sgb = $self->scan_list();
+ $saw_good_break ||= $sgb;
+ }
- # never indent line 1 of a '.' series because
- # previous line is most likely at same level.
- # TODO: we should also look at the leasing_spaces
- # of the last output line and skip if it is same
- # as this line.
- next if ( $leading_token eq '.' );
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
- my $count = 1;
- foreach my $l ( 2 .. 3 ) {
- last if ( $line + $l > $max_line );
- my $ibeg_next_next = $ri_first->[ $line + $l ];
- if ( $tokens_to_go[$ibeg_next_next] ne
- $leading_token )
- {
- $tokens_differ = 1;
- last;
- }
- $count++;
- }
- next if ($tokens_differ);
- next if ( $count < 3 && $leading_token ne ':' );
- $ipad = $ibeg;
- }
- else {
- next;
- }
- }
- }
+ # write a single line if..
+ if (
+
+ # we aren't allowed to add any newlines
+ !$rOpts_add_newlines
+
+ # or, we don't already have an interior breakpoint
+ # and we didn't see a good breakpoint
+ || (
+ !get_forced_breakpoint_count()
+ && !$saw_good_break
+
+ # and this line is 'short'
+ && !$is_long_line
+ )
+ )
+ {
+ @{$ri_first} = ($imin);
+ @{$ri_last} = ($imax);
}
- # find interior token to pad if necessary
- if ( !defined($ipad) ) {
+ # otherwise use multiple lines
+ else {
- for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
+ ( $ri_first, $ri_last, my $colon_count ) =
+ $self->set_continuation_breaks($saw_good_break);
- # find any unclosed container
- next
- unless ( $type_sequence_to_go[$i]
- && $self->mate_index_to_go($i) > $iend );
+ $self->break_all_chain_tokens( $ri_first, $ri_last );
- # find next nonblank token to pad
- $ipad = $inext_to_go[$i];
- last if ( $ipad > $iend );
- }
- last unless $ipad;
- }
+ $self->break_equals( $ri_first, $ri_last );
- # We cannot pad the first leading token of a file because
- # it could cause a bug in which the starting indentation
- # level is guessed incorrectly each time the code is run
- # though perltidy, thus causing the code to march off to
- # the right. For example, the following snippet would have
- # this problem:
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ if ( $rOpts->{'recombine'} ) {
+ ( $ri_first, $ri_last ) =
+ $self->recombine_breakpoints( $ri_first, $ri_last );
+ }
-## ov_method mycan( $package, '(""' ), $package
-## or ov_method mycan( $package, '(0+' ), $package
-## or ov_method mycan( $package, '(bool' ), $package
-## or ov_method mycan( $package, '(nomethod' ), $package;
+ $self->insert_final_ternary_breaks( $ri_first, $ri_last )
+ if $colon_count;
+ }
- # If this snippet is within a block this won't happen
- # unless the user just processes the snippet alone within
- # an editor. In that case either the user will see and
- # fix the problem or it will be corrected next time the
- # entire file is processed with perltidy.
- next if ( $ipad == 0 && $peak_batch_size <= 1 );
+ $self->insert_breaks_before_list_opening_containers( $ri_first,
+ $ri_last );
-## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
-## IT DID MORE HARM THAN GOOD
-## ceil(
-## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
-## / $upem
-## ),
-##? # do not put leading padding for just 2 lines of math
-##? if ( $ipad == $ibeg
-##? && $line > 0
-##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
-##? && $is_math_op{$type_next}
-##? && $line + 2 <= $max_line )
-##? {
-##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
-##? my $type_next_next = $types_to_go[$ibeg_next_next];
-##? next if !$is_math_op{$type_next_next};
-##? }
+ # do corrector step if -lp option is used
+ my $do_not_pad = 0;
+ if ($rOpts_line_up_parentheses) {
+ $do_not_pad =
+ $self->correct_lp_indentation( $ri_first, $ri_last );
+ }
+ $self->unmask_phantom_semicolons( $ri_first, $ri_last );
+ if ( $rOpts_one_line_block_semicolons == 0 ) {
+ $self->delete_one_line_semicolons( $ri_first, $ri_last );
+ }
- # next line must not be at greater depth
- my $iend_next = $ri_last->[ $line + 1 ];
- next
- if ( $nesting_depth_to_go[ $iend_next + 1 ] >
- $nesting_depth_to_go[$ipad] );
+ # The line breaks for this batch of code have been finalized. Now we
+ # can to package the results for further processing. We will switch
+ # from the local '_to_go' buffer arrays (i-index) back to the global
+ # token arrays (K-index) at this point.
+ my $rlines_K;
+ my $index_error;
+ for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
+ my $ibeg = $ri_first->[$n];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $iend = $ri_last->[$n];
+ my $Kend = $K_to_go[$iend];
+ if ( $iend - $ibeg != $Kend - $Kbeg ) {
+ $index_error = $n unless defined($index_error);
+ }
+ push @{$rlines_K},
+ [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
+ }
- # lines must be somewhat similar to be padded..
- my $inext_next = $inext_to_go[$ibeg_next];
- my $type = $types_to_go[$ipad];
- my $type_next = $types_to_go[ $ipad + 1 ];
+ # Check correctness of the mapping between the i and K token indexes
+ if ( defined($index_error) ) {
- # see if there are multiple continuation lines
- my $logical_continuation_lines = 1;
- if ( $line + 2 <= $max_line ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
- && $nesting_depth_to_go[$ibeg_next] eq
- $nesting_depth_to_go[$ibeg_next_next] )
- {
- $logical_continuation_lines++;
+ # Temporary debug code - should never get here
+ for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
+ my $ibeg = $ri_first->[$n];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $iend = $ri_last->[$n];
+ my $Kend = $K_to_go[$iend];
+ my $idiff = $iend - $ibeg;
+ my $Kdiff = $Kend - $Kbeg;
+ print STDERR <<EOM;
+line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
+EOM
}
+ Fault(
+ "Index error at line $index_error; i and K ranges differ");
}
- # see if leading types match
- my $types_match = $types_to_go[$inext_next] eq $type;
- my $matches_without_bang;
+ $this_batch->[_rlines_K_] = $rlines_K;
+ $this_batch->[_ibeg0_] = $ri_first->[0];
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_do_not_pad_] = $do_not_pad;
+ $this_batch->[_batch_count_] = $batch_count;
- # if first line has leading ! then compare the following token
- if ( !$types_match && $type eq '!' ) {
- $types_match = $matches_without_bang =
- $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
+ $self->send_lines_to_vertical_aligner();
+
+ # Insert any requested blank lines after an opening brace. We have
+ # to skip back before any side comment to find the terminal token
+ my $iterm;
+ for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
+ next if $types_to_go[$iterm] eq '#';
+ next if $types_to_go[$iterm] eq 'b';
+ last;
}
- if (
+ # write requested number of blank lines after an opening block brace
+ if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
+ if ( $rOpts->{'blank-lines-after-opening-block'}
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
+ $self->flush_vertical_aligner();
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
+ }
+ }
- # either we have multiple continuation lines to follow
- # and we are not padding the first token
- ( $logical_continuation_lines > 1 && $ipad > 0 )
+ # Remember the largest batch size processed. This is needed by the
+ # logical padding routine to avoid padding the first nonblank token
+ if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
+ $peak_batch_size = $max_index_to_go;
+ }
- # or..
- || (
+ return;
+ }
+} ## end closure grind_batch_of_CODE
- # types must match
- $types_match
+{ ## begin closure match_opening_and_closing_tokens
- # and keywords must match if keyword
- && !(
- $type eq 'k'
- && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
- )
- )
- )
- {
+ # closure to keep track of unbalanced containers.
+ # arrays shared by the routines in this block:
+ my %saved_opening_indentation;
+ my @unmatched_opening_indexes_in_this_batch;
+ my @unmatched_closing_indexes_in_this_batch;
+ my %comma_arrow_count;
- #----------------------begin special checks--------------
- #
- # SPECIAL CHECK 1:
- # A check is needed before we can make the pad.
- # If we are in a list with some long items, we want each
- # item to stand out. So in the following example, the
- # first line beginning with '$casefold->' would look good
- # padded to align with the next line, but then it
- # would be indented more than the last line, so we
- # won't do it.
- #
- # ok(
- # $casefold->{code} eq '0041'
- # && $casefold->{status} eq 'C'
- # && $casefold->{mapping} eq '0061',
- # 'casefold 0x41'
- # );
- #
- # Note:
- # It would be faster, and almost as good, to use a comma
- # count, and not pad if comma_count > 1 and the previous
- # line did not end with a comma.
- #
- my $ok_to_pad = 1;
+ sub initialize_saved_opening_indentation {
+ %saved_opening_indentation = ();
+ return;
+ }
- my $ibg = $ri_first->[ $line + 1 ];
- my $depth = $nesting_depth_to_go[ $ibg + 1 ];
+ sub is_unbalanced_batch {
+ return @unmatched_opening_indexes_in_this_batch +
+ @unmatched_closing_indexes_in_this_batch;
+ }
- # just use simplified formula for leading spaces to avoid
- # needless sub calls
- my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
+ sub comma_arrow_count {
+ my $seqno = shift;
+ return $comma_arrow_count{$seqno};
+ }
- # look at each line beyond the next ..
- my $l = $line + 1;
- foreach my $ltest ( $line + 2 .. $max_line ) {
- $l = $ltest;
- my $ibg = $ri_first->[$l];
+ sub match_opening_and_closing_tokens {
- # quit looking at the end of this container
- last
- if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
- || ( $nesting_depth_to_go[$ibg] < $depth );
+ # Match up indexes of opening and closing braces, etc, in this batch.
+ # This has to be done after all tokens are stored because unstoring
+ # of tokens would otherwise cause trouble.
- # cannot do the pad if a later line would be
- # outdented more
- if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
- $ok_to_pad = 0;
- last;
- }
+ my ($self) = @_;
+
+ @unmatched_opening_indexes_in_this_batch = ();
+ @unmatched_closing_indexes_in_this_batch = ();
+ %comma_arrow_count = ();
+ my $comma_arrow_count_contained = 0;
+
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ if ( $type_sequence_to_go[$i] ) {
+ my $token = $tokens_to_go[$i];
+ if ( $token =~ /^[\(\[\{\?]$/ ) {
+ push @unmatched_opening_indexes_in_this_batch, $i;
}
+ elsif ( $token =~ /^[\)\]\}\:]$/ ) {
- # don't pad if we end in a broken list
- if ( $l == $max_line ) {
- my $i2 = $ri_last->[$l];
- if ( $types_to_go[$i2] eq '#' ) {
- my $i1 = $ri_first->[$l];
- next if $self->terminal_type_i( $i1, $i2 ) eq ',';
+ my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
+ if ( defined($i_mate) && $i_mate >= 0 ) {
+ if ( $type_sequence_to_go[$i_mate] ==
+ $type_sequence_to_go[$i] )
+ {
+ $mate_index_to_go[$i] = $i_mate;
+ $mate_index_to_go[$i_mate] = $i;
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $comma_arrow_count{$seqno} ) {
+ $comma_arrow_count_contained +=
+ $comma_arrow_count{$seqno};
+ }
+ }
+ else {
+ push @unmatched_opening_indexes_in_this_batch,
+ $i_mate;
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
}
+ else {
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ }
+ }
+ elsif ( $tokens_to_go[$i] eq '=>' ) {
+ if (@unmatched_opening_indexes_in_this_batch) {
+ my $j = $unmatched_opening_indexes_in_this_batch[-1];
+ my $seqno = $type_sequence_to_go[$j];
+ $comma_arrow_count{$seqno}++;
}
+ }
+ }
+ return $comma_arrow_count_contained;
+ }
- # SPECIAL CHECK 2:
- # a minus may introduce a quoted variable, and we will
- # add the pad only if this line begins with a bare word,
- # such as for the word 'Button' here:
- # [
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- #
- # On the other hand, if 'Button' is quoted, it looks best
- # not to pad:
- # [
- # 'Button' => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- if ( $types_to_go[$ibeg_next] eq 'm' ) {
- $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
- }
-
- next unless $ok_to_pad;
-
- #----------------------end special check---------------
-
- my $length_1 = total_line_length( $ibeg, $ipad - 1 );
- my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
- $pad_spaces = $length_2 - $length_1;
+ sub save_opening_indentation {
- # If the first line has a leading ! and the second does
- # not, then remove one space to try to align the next
- # leading characters, which are often the same. For example:
- # if ( !$ts
- # || $ts == $self->Holder
- # || $self->Holder->Type eq "Arena" )
- #
- # This usually helps readability, but if there are subsequent
- # ! operators things will still get messed up. For example:
- #
- # if ( !exists $Net::DNS::typesbyname{$qtype}
- # && exists $Net::DNS::classesbyname{$qtype}
- # && !exists $Net::DNS::classesbyname{$qclass}
- # && exists $Net::DNS::typesbyname{$qclass} )
- # We can't fix that.
- if ($matches_without_bang) { $pad_spaces-- }
+ # This should be called after each batch of tokens is output. It
+ # saves indentations of lines of all unmatched opening tokens.
+ # These will be used by sub get_opening_indentation.
- # make sure this won't change if -lp is used
- my $indentation_1 = $leading_spaces_to_go[$ibeg];
- if ( ref($indentation_1) ) {
- if ( $indentation_1->get_recoverable_spaces() == 0 ) {
- my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
- unless ( $indentation_2->get_recoverable_spaces() == 0 )
- {
- $pad_spaces = 0;
- }
- }
- }
+ my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
- # we might be able to handle a pad of -1 by removing a blank
- # token
- if ( $pad_spaces < 0 ) {
+ # we no longer need indentations of any saved indentations which
+ # are unmatched closing tokens in this batch, because we will
+ # never encounter them again. So we can delete them to keep
+ # the hash size down.
+ foreach (@unmatched_closing_indexes_in_this_batch) {
+ my $seqno = $type_sequence_to_go[$_];
+ delete $saved_opening_indentation{$seqno};
+ }
- # Deactivated for -kpit due to conflict. This block deletes
- # a space in an attempt to improve alignment in some cases,
- # but it may conflict with user spacing requests. For now
- # it is just deactivated if the -kpit option is used.
- if ( $pad_spaces == -1 ) {
- if ( $ipad > $ibeg
- && $types_to_go[ $ipad - 1 ] eq 'b'
- && !%keyword_paren_inner_tightness )
- {
- $self->pad_token( $ipad - 1, $pad_spaces );
- }
- }
- $pad_spaces = 0;
- }
+ # we need to save indentations of any unmatched opening tokens
+ # in this batch because we may need them in a subsequent batch.
+ foreach (@unmatched_opening_indexes_in_this_batch) {
+ my $seqno = $type_sequence_to_go[$_];
+ $saved_opening_indentation{$seqno} = [
+ lookup_opening_indentation(
+ $_, $ri_first, $ri_last, $rindentation_list
+ )
+ ];
+ }
+ return;
+ }
- # now apply any padding for alignment
- if ( $ipad >= 0 && $pad_spaces ) {
+ sub get_saved_opening_indentation {
+ my ($seqno) = @_;
+ my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
- my $length_t = total_line_length( $ibeg, $iend );
- if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
- {
- $self->pad_token( $ipad, $pad_spaces );
- }
- }
+ if ($seqno) {
+ if ( $saved_opening_indentation{$seqno} ) {
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
+ $exists = 1;
}
}
- continue {
- $iendm = $iend;
- $ibegm = $ibeg;
- $has_leading_op = $has_leading_op_next;
- } # end of loop over lines
- return;
- }
-} ## end closure set_logical_padding
-sub correct_lp_indentation {
+ # some kind of serious error it doesn't exist
+ # (example is badfile.t)
- # When the -lp option is used, we need to make a last pass through
- # each line to correct the indentation positions in case they differ
- # from the predictions. This is necessary because perltidy uses a
- # predictor/corrector method for aligning with opening parens. The
- # predictor is usually good, but sometimes stumbles. The corrector
- # tries to patch things up once the actual opening paren locations
- # are known.
- my ( $self, $ri_first, $ri_last ) = @_;
- my $do_not_pad = 0;
+ return ( $indent, $offset, $is_leading, $exists );
+ }
+} ## end closure match_opening_and_closing_tokens
- # Note on flag '$do_not_pad':
- # We want to avoid a situation like this, where the aligner inserts
- # whitespace before the '=' to align it with a previous '=', because
- # otherwise the parens might become mis-aligned in a situation like
- # this, where the '=' has become aligned with the previous line,
- # pushing the opening '(' forward beyond where we want it.
+sub lookup_opening_indentation {
+
+ # get the indentation of the line in the current output batch
+ # which output a selected opening token
#
- # $mkFloor::currentRoom = '';
- # $mkFloor::c_entry = $c->Entry(
- # -width => '10',
- # -relief => 'sunken',
- # ...
- # );
+ # given:
+ # $i_opening - index of an opening token in the current output batch
+ # whose line indentation we need
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line. (NOTE: the first slot in
+ # this list is the last returned line number, and this is
+ # followed by the list of indentations).
#
- # We leave it to the aligner to decide how to do this.
-
- # first remove continuation indentation if appropriate
- my $max_line = @{$ri_first} - 1;
+ # return
+ # -the indentation of the line which contained token $i_opening
+ # -and its offset (number of columns) from the start of the line
- # looking at each line of this batch..
- my ( $ibeg, $iend );
- foreach my $line ( 0 .. $max_line ) {
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
+ my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
- # looking at each token in this output line..
- foreach my $i ( $ibeg .. $iend ) {
+ if ( !@{$ri_last} ) {
+ warning("Error in opening_indentation: no lines");
+ return;
+ }
- # How many space characters to place before this token
- # for special alignment. Actual padding is done in the
- # continue block.
+ my $nline = $rindentation_list->[0]; # line number of previous lookup
- # looking for next unvisited indentation item
- my $indentation = $leading_spaces_to_go[$i];
- if ( !$indentation->get_marked() ) {
- $indentation->set_marked(1);
+ # reset line location if necessary
+ $nline = 0 if ( $i_opening < $ri_start->[$nline] );
- # looking for indentation item for which we are aligning
- # with parens, braces, and brackets
- next unless ( $indentation->get_align_paren() );
+ # find the correct line
+ unless ( $i_opening > $ri_last->[-1] ) {
+ while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
+ }
- # skip closed container on this line
- if ( $i > $ibeg ) {
- my $im = max( $ibeg, $iprev_to_go[$i] );
- if ( $type_sequence_to_go[$im]
- && $mate_index_to_go[$im] <= $iend )
- {
- next;
- }
- }
+ # error - token index is out of bounds - shouldn't happen
+ else {
+ warning(
+"non-fatal program bug in lookup_opening_indentation - index out of range\n"
+ );
+ report_definite_bug();
+ $nline = $#{$ri_last};
+ }
- if ( $line == 1 && $i == $ibeg ) {
- $do_not_pad = 1;
- }
+ $rindentation_list->[0] =
+ $nline; # save line number to start looking next call
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
+}
- # Ok, let's see what the error is and try to fix it
- my $actual_pos;
- my $predicted_pos = $indentation->get_spaces();
- if ( $i > $ibeg ) {
+{ ## begin closure terminal_type_i
- # token is mid-line - use length to previous token
- $actual_pos = total_line_length( $ibeg, $i - 1 );
+ my %is_sort_map_grep_eval_do;
- # for mid-line token, we must check to see if all
- # additional lines have continuation indentation,
- # and remove it if so. Otherwise, we do not get
- # good alignment.
- my $closing_index = $indentation->get_closed();
- if ( $closing_index > $iend ) {
- my $ibeg_next = $ri_first->[ $line + 1 ];
- if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
- $self->undo_lp_ci( $line, $i, $closing_index,
- $ri_first, $ri_last );
- }
- }
- }
- elsif ( $line > 0 ) {
+ BEGIN {
+ my @q = qw(sort map grep eval do);
+ @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
+ }
- # handle case where token starts a new line;
- # use length of previous line
- my $ibegm = $ri_first->[ $line - 1 ];
- my $iendm = $ri_last->[ $line - 1 ];
- $actual_pos = total_line_length( $ibegm, $iendm );
+ sub terminal_type_i {
- # follow -pt style
- ++$actual_pos
- if ( $types_to_go[ $iendm + 1 ] eq 'b' );
- }
- else {
-
- # token is first character of first line of batch
- $actual_pos = $predicted_pos;
- }
-
- my $move_right = $actual_pos - $predicted_pos;
+ # returns type of last token on this line (terminal token), as follows:
+ # returns # for a full-line comment
+ # returns ' ' for a blank line
+ # otherwise returns final token type
- # done if no error to correct (gnu2.t)
- if ( $move_right == 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+ my ( $self, $ibeg, $iend ) = @_;
- # if we have not seen closure for this indentation in
- # this batch, we can only pass on a request to the
- # vertical aligner
- my $closing_index = $indentation->get_closed();
+ # Start at the end and work backwards
+ my $i = $iend;
+ my $type_i = $types_to_go[$i];
- if ( $closing_index < 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+ # Check for side comment
+ if ( $type_i eq '#' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
- # If necessary, look ahead to see if there is really any
- # leading whitespace dependent on this whitespace, and
- # also find the longest line using this whitespace.
- # Since it is always safe to move left if there are no
- # dependents, we only need to do this if we may have
- # dependent nodes or need to move right.
+ # Skip past a blank
+ if ( $type_i eq 'b' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
- my $right_margin = 0;
- my $have_child = $indentation->get_have_child();
+ # Found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/grep/map because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $block_type_to_go[$i];
+ if (
+ $type_i eq '}'
+ && ( !$block_type
+ || ( $is_sort_map_grep_eval_do{$block_type} ) )
+ )
+ {
+ $type_i = 'b';
+ }
+ return wantarray ? ( $type_i, $i ) : $type_i;
+ }
- my %saw_indentation;
- my $line_count = 1;
- $saw_indentation{$indentation} = $indentation;
+} ## end closure terminal_type_i
- if ( $have_child || $move_right > 0 ) {
- $have_child = 0;
- my $max_length = 0;
- if ( $i == $ibeg ) {
- $max_length = total_line_length( $ibeg, $iend );
- }
+sub pad_array_to_go {
- # look ahead at the rest of the lines of this batch..
- foreach my $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $ri_first->[$line_t];
- my $iend_t = $ri_last->[$line_t];
- last if ( $closing_index <= $ibeg_t );
+ # To simplify coding in scan_list and set_bond_strengths, it helps
+ # to create some extra blank tokens at the end of the arrays
+ # FIXME: it would be nice to eliminate the need for this routine.
+ my ($self) = @_;
+ $tokens_to_go[ $max_index_to_go + 1 ] = '';
+ $tokens_to_go[ $max_index_to_go + 2 ] = '';
+ $types_to_go[ $max_index_to_go + 1 ] = 'b';
+ $types_to_go[ $max_index_to_go + 2 ] = 'b';
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] =
+ $nesting_depth_to_go[$max_index_to_go];
- # remember all different indentation objects
- my $indentation_t = $leading_spaces_to_go[$ibeg_t];
- $saw_indentation{$indentation_t} = $indentation_t;
- $line_count++;
+ # /^[R\}\)\]]$/
+ if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
+ if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
- # remember longest line in the group
- my $length_t = total_line_length( $ibeg_t, $iend_t );
- if ( $length_t > $max_length ) {
- $max_length = $length_t;
- }
- }
- $right_margin = maximum_line_length($ibeg) - $max_length;
- if ( $right_margin < 0 ) { $right_margin = 0 }
- }
+ # shouldn't happen:
+ unless ( get_saw_brace_error() ) {
+ warning(
+"Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
+ );
+ report_definite_bug();
+ }
+ }
+ else {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
+ }
+ }
- my $first_line_comma_count =
- grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_comma_count();
- my $arrow_count = $indentation->get_arrow_count();
+ # /^[L\{\(\[]$/
+ elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
+ }
+ return;
+}
- # This is a simple approximate test for vertical alignment:
- # if we broke just after an opening paren, brace, bracket,
- # and there are 2 or more commas in the first line,
- # and there are no '=>'s,
- # then we are probably vertically aligned. We could set
- # an exact flag in sub scan_list, but this is good
- # enough.
- my $indentation_count = keys %saw_indentation;
- my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
+sub break_all_chain_tokens {
- # Make the move if possible ..
- if (
+ # scan the current breakpoints looking for breaks at certain "chain
+ # operators" (. : && || + etc) which often occur repeatedly in a long
+ # statement. If we see a break at any one, break at all similar tokens
+ # within the same container.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
- # we can always move left
- $move_right < 0
+ my %saw_chain_type;
+ my %left_chain_type;
+ my %right_chain_type;
+ my %interior_chain_type;
+ my $nmax = @{$ri_right} - 1;
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ $typel = '+' if ( $typel eq '-' ); # treat + and - the same
+ $typer = '+' if ( $typer eq '-' );
+ $typel = '*' if ( $typel eq '/' ); # treat * and / the same
+ $typer = '*' if ( $typer eq '/' );
+ my $tokenl = $tokens_to_go[$il];
+ my $tokenr = $tokens_to_go[$ir];
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_available_spaces( -$move );
- }
- }
+ if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
+ next if ( $typel eq '?' );
+ push @{ $left_chain_type{$typel} }, $il;
+ $saw_chain_type{$typel} = 1;
+ $count++;
+ }
+ if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
+ next if ( $typer eq '?' );
+ push @{ $right_chain_type{$typer} }, $ir;
+ $saw_chain_type{$typer} = 1;
+ $count++;
+ }
+ }
+ return unless $count;
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
- else {
- $indentation->set_recoverable_spaces($move_right);
- }
+ # now look for any interior tokens of the same types
+ $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir - 1 ) {
+ my $type = $types_to_go[$i];
+ $type = '+' if ( $type eq '-' );
+ $type = '*' if ( $type eq '/' );
+ if ( $saw_chain_type{$type} ) {
+ push @{ $interior_chain_type{$type} }, $i;
+ $count++;
}
}
}
- return $do_not_pad;
-}
+ return unless $count;
-{ ## begin closure accumulate_csc_text
+ # now make a list of all new break points
+ my @insert_list;
-# These routines are called once per batch when the --closing-side-comments flag
-# has been set.
+ # loop over all chain types
+ foreach my $type ( keys %saw_chain_type ) {
- my %block_leading_text;
- my %block_opening_line_number;
- my $csc_new_statement_ok;
- my $csc_last_label;
- my %csc_block_label;
- my $accumulating_text_for_block;
- my $leading_block_text;
- my $rleading_block_if_elsif_text;
- my $leading_block_text_level;
- my $leading_block_text_length_exceeded;
- my $leading_block_text_line_length;
- my $leading_block_text_line_number;
+ # quit if just ONE continuation line with leading . For example--
+ # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
+ # . $contents;
+ last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
- sub initialize_csc_vars {
- %block_leading_text = ();
- %block_opening_line_number = ();
- $csc_new_statement_ok = 1;
- $csc_last_label = "";
- %csc_block_label = ();
- $rleading_block_if_elsif_text = [];
- $accumulating_text_for_block = "";
- reset_block_text_accumulator();
- return;
- }
+ # loop over all interior chain tokens
+ foreach my $itest ( @{ $interior_chain_type{$type} } ) {
- sub reset_block_text_accumulator {
+ # loop over all left end tokens of same type
+ if ( $left_chain_type{$type} ) {
+ next if $nobreak_to_go[ $itest - 1 ];
+ foreach my $i ( @{ $left_chain_type{$type} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest - 1;
- # save text after 'if' and 'elsif' to append after 'else'
- if ($accumulating_text_for_block) {
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
+ last;
+ }
+ }
- if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
- push @{$rleading_block_if_elsif_text}, $leading_block_text;
+ # loop over all right end tokens of same type
+ if ( $right_chain_type{$type} ) {
+ next if $nobreak_to_go[$itest];
+ foreach my $i ( @{ $right_chain_type{$type} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest;
+
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
+ last;
+ }
}
}
- $accumulating_text_for_block = "";
- $leading_block_text = "";
- $leading_block_text_level = 0;
- $leading_block_text_length_exceeded = 0;
- $leading_block_text_line_number = 0;
- $leading_block_text_line_length = 0;
- return;
}
- sub set_block_text_accumulator {
- my ( $self, $i ) = @_;
- $accumulating_text_for_block = $tokens_to_go[$i];
- if ( $accumulating_text_for_block !~ /^els/ ) {
- $rleading_block_if_elsif_text = [];
- }
- $leading_block_text = "";
- $leading_block_text_level = $levels_to_go[$i];
- $leading_block_text_line_number = $self->get_output_line_number();
- $leading_block_text_length_exceeded = 0;
-
- # this will contain the column number of the last character
- # of the closing side comment
- $leading_block_text_line_length =
- length($csc_last_label) +
- length($accumulating_text_for_block) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $leading_block_text_level * $rOpts_indent_columns + 3;
- return;
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
+ return;
+}
- sub accumulate_block_text {
- my ( $self, $i ) = @_;
+sub insert_additional_breaks {
- # accumulate leading text for -csc, ignoring any side comments
- if ( $accumulating_text_for_block
- && !$leading_block_text_length_exceeded
- && $types_to_go[$i] ne '#' )
- {
+ # this routine will add line breaks at requested locations after
+ # sub set_continuation_breaks has made preliminary breaks.
- my $added_length = $token_lengths_to_go[$i];
- $added_length += 1 if $i == 0;
- my $new_line_length =
- $leading_block_text_line_length + $added_length;
+ my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
+ my $i_f;
+ my $i_l;
+ my $line_number = 0;
+ foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
- # we can add this text if we don't exceed some limits..
- if (
+ next if ( $nobreak_to_go[$i_break_left] );
- # we must not have already exceeded the text length limit
- length($leading_block_text) <
- $rOpts_closing_side_comment_maximum_text
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ while ( $i_break_left >= $i_l ) {
+ $line_number++;
- # and either:
- # the new total line length must be below the line length limit
- # or the new length must be below the text length limit
- # (ie, we may allow one token to exceed the text length limit)
- && (
- $new_line_length <
- maximum_line_length_for_level($leading_block_text_level)
+ # shouldn't happen unless caller passes bad indexes
+ if ( $line_number >= @{$ri_last} ) {
+ warning(
+"Non-fatal program bug: couldn't set break at $i_break_left\n"
+ );
+ report_definite_bug();
+ return;
+ }
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ }
- || length($leading_block_text) + $added_length <
- $rOpts_closing_side_comment_maximum_text
- )
+ # Do not leave a blank at the end of a line; back up if necessary
+ if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
- # UNLESS: we are adding a closing paren before the brace we seek.
- # This is an attempt to avoid situations where the ... to be
- # added are longer than the omitted right paren, as in:
+ my $i_break_right = $inext_to_go[$i_break_left];
+ if ( $i_break_left >= $i_f
+ && $i_break_left < $i_l
+ && $i_break_right > $i_f
+ && $i_break_right <= $i_l )
+ {
+ splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
+ splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+ }
+ }
+ return;
+}
- # foreach my $item (@a_rather_long_variable_name_here) {
- # &whatever;
- # } ## end foreach my $item (@a_rather_long_variable_name_here...
+sub in_same_container_i {
- || (
- $tokens_to_go[$i] eq ')'
- && (
- (
- $i + 1 <= $max_index_to_go
- && $block_type_to_go[ $i + 1 ] eq
- $accumulating_text_for_block
- )
- || ( $i + 2 <= $max_index_to_go
- && $block_type_to_go[ $i + 2 ] eq
- $accumulating_text_for_block )
- )
- )
- )
- {
+ # check to see if tokens at i1 and i2 are in the
+ # same container, and not separated by a comma, ? or :
+ # This is an interface between the _to_go arrays to the rLL array
+ my ( $self, $i1, $i2 ) = @_;
+ return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
+}
- # add an extra space at each newline
- if ( $i == 0 ) { $leading_block_text .= ' ' }
+{ ## begin closure in_same_container_K
+ my $ris_break_token;
+ my $ris_comma_token;
- # add the token text
- $leading_block_text .= $tokens_to_go[$i];
- $leading_block_text_line_length = $new_line_length;
- }
+ BEGIN {
- # show that text was truncated if necessary
- elsif ( $types_to_go[$i] ne 'b' ) {
- $leading_block_text_length_exceeded = 1;
- $leading_block_text .= '...';
- }
- }
- return;
+ # all cases break on seeing commas at same level
+ my @q = qw( => );
+ push @q, ',';
+ @{$ris_comma_token}{@q} = (1) x scalar(@q);
+
+ # Non-ternary text also breaks on seeing any of qw(? : || or )
+ # Example: we would not want to break at any of these .'s
+ # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
+ push @q, qw( or || ? : );
+ @{$ris_break_token}{@q} = (1) x scalar(@q);
}
- sub accumulate_csc_text {
+ sub in_same_container_K {
- my ($self) = @_;
+ # Check to see if tokens at K1 and K2 are in the same container,
+ # and not separated by certain characters: => , ? : || or
+ # This version uses the newer $rLL data structure.
- # called once per output buffer when -csc is used. Accumulates
- # the text placed after certain closing block braces.
- # Defines and returns the following for this buffer:
+ my ( $self, $K1, $K2 ) = @_;
+ if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
+ my $rLL = $self->[_rLL_];
+ my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
+ return if ( $depth_1 < 0 );
+ return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
- my $block_leading_text = ""; # the leading text of the last '}'
- my $rblock_leading_if_elsif_text;
- my $i_block_leading_text =
- -1; # index of token owning block_leading_text
- my $block_line_count = 100; # how many lines the block spans
- my $terminal_type = 'b'; # type of last nonblank token
- my $i_terminal = 0; # index of last nonblank token
- my $terminal_block_type = "";
+ # Select character set to scan for
+ my $type_1 = $rLL->[$K1]->[_TYPE_];
+ my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
- # update most recent statement label
- $csc_last_label = "" unless ($csc_last_label);
- if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
- my $block_label = $csc_last_label;
+ # Fast preliminary loop to verify that tokens are in the same container
+ my $KK = $K1;
+ while (1) {
+ $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ last if !defined($KK);
+ last if ( $KK >= $K2 );
+ my $depth_K = $rLL->[$KK]->[_SLEVEL_];
+ return if ( $depth_K < $depth_1 );
+ next if ( $depth_K > $depth_1 );
+ if ( $type_1 ne ':' ) {
+ my $tok_K = $rLL->[$KK]->[_TOKEN_];
+ return if ( $tok_K eq '?' || $tok_K eq ':' );
+ }
+ }
- # Loop over all tokens of this batch
- for my $i ( 0 .. $max_index_to_go ) {
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
+ # Slow loop checking for certain characters
- # remember last nonblank token type
- if ( $type ne '#' && $type ne 'b' ) {
- $terminal_type = $type;
- $terminal_block_type = $block_type;
- $i_terminal = $i;
- }
+ ###########################################################
+ # This is potentially a slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ ###########################################################
+ return if ( $K2 - $K1 > 200 );
- my $type_sequence = $type_sequence_to_go[$i];
- if ( $block_type && $type_sequence ) {
+ foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
- if ( $token eq '}' ) {
+ my $depth_K = $rLL->[$K]->[_SLEVEL_];
+ next if ( $depth_K > $depth_1 );
+ return if ( $depth_K < $depth_1 ); # redundant, checked above
+ my $tok = $rLL->[$K]->[_TOKEN_];
+ return if ( $rbreak->{$tok} );
+ }
+ return 1;
+ }
+} ## end closure in_same_container_K
- # restore any leading text saved when we entered this block
- if ( defined( $block_leading_text{$type_sequence} ) ) {
- ( $block_leading_text, $rblock_leading_if_elsif_text )
- = @{ $block_leading_text{$type_sequence} };
- $i_block_leading_text = $i;
- delete $block_leading_text{$type_sequence};
- $rleading_block_if_elsif_text =
- $rblock_leading_if_elsif_text;
- }
+sub break_equals {
- if ( defined( $csc_block_label{$type_sequence} ) ) {
- $block_label = $csc_block_label{$type_sequence};
- delete $csc_block_label{$type_sequence};
- }
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 2 );
- # if we run into a '}' then we probably started accumulating
- # at something like a trailing 'if' clause..no harm done.
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] <= $leading_block_text_level )
- {
- my $lev = $levels_to_go[$i];
- reset_block_text_accumulator();
- }
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
- if ( defined( $block_opening_line_number{$type_sequence} ) )
- {
- my $output_line_number =
- $self->get_output_line_number();
- $block_line_count =
- $output_line_number -
- $block_opening_line_number{$type_sequence} + 1;
- delete $block_opening_line_number{$type_sequence};
- }
- else {
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
- # Error: block opening line undefined for this line..
- # This shouldn't be possible, but it is not a
- # significant problem.
- }
- }
+ # now look for any interior tokens of the same types
+ my $il = $ri_left->[0];
+ my $ir = $ri_right->[0];
- elsif ( $token eq '{' ) {
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
+ }
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
- my $line_number = $self->get_output_line_number();
- $block_opening_line_number{$type_sequence} = $line_number;
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
- # set a label for this block, except for
- # a bare block which already has the label
- # A label can only be used on the next {
- if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
- $csc_block_label{$type_sequence} = $csc_last_label;
- $csc_last_label = "";
+ return unless (@insert_list);
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
+ # One final check...
+ # scan second and third lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
- if ( $accumulating_text_for_block eq $block_type ) {
+ # ok, insert any new break point
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
- # save any leading text before we enter this block
- $block_leading_text{$type_sequence} = [
- $leading_block_text,
- $rleading_block_if_elsif_text
- ];
- $block_opening_line_number{$type_sequence} =
- $leading_block_text_line_number;
- reset_block_text_accumulator();
- }
- else {
+{ ## begin closure recombine_breakpoints
- # shouldn't happen, but not a serious error.
- # We were accumulating -csc text for block type
- # $accumulating_text_for_block and unexpectedly
- # encountered a '{' for block type $block_type.
- }
- }
- }
- }
+ # This routine is called once per batch to see if it would be better
+ # to combine some of the lines into which the batch has been broken.
- if ( $type eq 'k'
- && $csc_new_statement_ok
- && $is_if_elsif_else_unless_while_until_for_foreach{$token}
- && $token =~ /$closing_side_comment_list_pattern/ )
- {
- $self->set_block_text_accumulator($i);
- }
- else {
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+ my %is_plus_minus;
+ my %is_mult_div;
- # note: ignoring type 'q' because of tricks being played
- # with 'q' for hanging side comments
- if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
- $csc_new_statement_ok =
- ( $block_type || $type eq 'J' || $type eq ';' );
- }
- if ( $type eq ';'
- && $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
- reset_block_text_accumulator();
- }
- else {
- $self->accumulate_block_text($i);
- }
- }
- }
+ BEGIN {
- # Treat an 'else' block specially by adding preceding 'if' and
- # 'elsif' text. Otherwise, the 'end else' is not helpful,
- # especially for cuddled-else formatting.
- if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
- $block_leading_text =
- $self->make_else_csc_text( $i_terminal, $terminal_block_type,
- $block_leading_text, $rblock_leading_if_elsif_text );
- }
+ my @q;
+ @q = qw( && || );
+ @is_amp_amp{@q} = (1) x scalar(@q);
- # if this line ends in a label then remember it for the next pass
- $csc_last_label = "";
- if ( $terminal_type eq 'J' ) {
- $csc_last_label = $tokens_to_go[$i_terminal];
- }
+ @q = qw( ? : );
+ @is_ternary{@q} = (1) x scalar(@q);
- return ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label );
+ @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
+
+ @q = qw( + - );
+ @is_plus_minus{@q} = (1) x scalar(@q);
+
+ @q = qw( * / );
+ @is_mult_div{@q} = (1) x scalar(@q);
}
- sub make_else_csc_text {
+ sub Debug_dump_breakpoints {
- # create additional -csc text for an 'else' and optionally 'elsif',
- # depending on the value of switch
- #
- # = 0 add 'if' text to trailing else
- # = 1 same as 0 plus:
- # add 'if' to 'elsif's if can fit in line length
- # add last 'elsif' to trailing else if can fit in one line
- # = 2 same as 1 but do not check if exceed line length
- #
- # $rif_elsif_text = a reference to a list of all previous closing
- # side comments created for this if block
- #
- my ( $self, $i_terminal, $block_type, $block_leading_text,
- $rif_elsif_text )
- = @_;
- my $csc_text = $block_leading_text;
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $ri_beg->[$n];
+ my $iend = $ri_end->[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
+ }
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ return;
+ }
- my $rOpts_closing_side_comment_else_flag =
- $rOpts->{'closing-side-comment-else-flag'};
+ sub delete_one_line_semicolons {
- if ( $block_type eq 'elsif'
- && $rOpts_closing_side_comment_else_flag == 0 )
- {
- return $csc_text;
- }
+ my ( $self, $ri_beg, $ri_end ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
- my $count = @{$rif_elsif_text};
- return $csc_text unless ($count);
+ # Walk down the lines of this batch and delete any semicolons
+ # terminating one-line blocks;
+ my $nmax = @{$ri_end} - 1;
- my $if_text = '[ if' . $rif_elsif_text->[0];
+ foreach my $n ( 0 .. $nmax ) {
+ my $i_beg = $ri_beg->[$n];
+ my $i_e = $ri_end->[$n];
+ my $K_beg = $K_to_go[$i_beg];
+ my $K_e = $K_to_go[$i_e];
+ my $K_end = $K_e;
+ my $type_end = $rLL->[$K_end]->[_TYPE_];
+ if ( $type_end eq '#' ) {
+ $K_end = $self->K_previous_nonblank($K_end);
+ if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
+ }
- # always show the leading 'if' text on 'else'
- if ( $block_type eq 'else' ) {
- $csc_text .= $if_text;
- }
+ # we are looking for a line ending in closing brace
+ next
+ unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
- # see if that's all
- if ( $rOpts_closing_side_comment_else_flag == 0 ) {
- return $csc_text;
- }
+ # ...and preceded by a semicolon on the same line
+ my $K_semicolon = $self->K_previous_nonblank($K_end);
+ next unless defined($K_semicolon);
+ my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
+ next if ( $i_semicolon <= $i_beg );
+ next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
- my $last_elsif_text = "";
- if ( $count > 1 ) {
- $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
- if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
- }
+ # safety check - shouldn't happen
+ if ( $types_to_go[$i_semicolon] ne ';' ) {
+ Fault("unexpected type looking for semicolon, ignoring");
+ next;
+ }
- # tentatively append one more item
- my $saved_text = $csc_text;
- if ( $block_type eq 'else' ) {
- $csc_text .= $last_elsif_text;
- }
- else {
- $csc_text .= ' ' . $if_text;
- }
+ # ... with the corresponding opening brace on the same line
+ my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$type_sequence};
+ next unless ( defined($K_opening) );
+ my $i_opening = $i_beg + ( $K_opening - $K_beg );
+ next if ( $i_opening < $i_beg );
- # all done if no length checks requested
- if ( $rOpts_closing_side_comment_else_flag == 2 ) {
- return $csc_text;
- }
+ # ... and only one semicolon between these braces
+ my $semicolon_count = 0;
+ foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
+ if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
+ $semicolon_count++;
+ last;
+ }
+ }
+ next if ($semicolon_count);
- # undo it if line length exceeded
- my $length =
- length($csc_text) +
- length($block_type) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
- if (
- $length > maximum_line_length_for_level($leading_block_text_level) )
- {
- $csc_text = $saved_text;
+ # ...ok, then make the semicolon invisible
+ $tokens_to_go[$i_semicolon] = "";
+ $token_lengths_to_go[$i_semicolon] = 0;
+ $rLL->[$K_semicolon]->[_TOKEN_] = "";
+ $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
}
- return $csc_text;
+ return;
}
-} ## end closure accumulate_csc_text
-
-{ ## begin closure balance_csc_text
-
- # Some additional routines for handling the --closing-side-comments option
- my %matching_char;
+ sub unmask_phantom_semicolons {
- BEGIN {
- %matching_char = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '}' => '{',
- ')' => '(',
- ']' => '[',
- );
- }
+ my ( $self, $ri_beg, $ri_end ) = @_;
- sub balance_csc_text {
+ # Walk down the lines of this batch and unmask any invisible line-ending
+ # semicolons. They were placed by sub respace_tokens but we only now
+ # know if we actually need them.
+ my $rLL = $self->[_rLL_];
- # Append characters to balance a closing side comment so that editors
- # such as vim can correctly jump through code.
- # Simple Example:
- # input = ## end foreach my $foo ( sort { $b ...
- # output = ## end foreach my $foo ( sort { $b ...})
+ my $nmax = @{$ri_end} - 1;
+ foreach my $n ( 0 .. $nmax ) {
- # NOTE: This routine does not currently filter out structures within
- # quoted text because the bounce algorithms in text editors do not
- # necessarily do this either (a version of vim was checked and
- # did not do this).
+ my $i = $ri_end->[$n];
+ if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
- # Some complex examples which will cause trouble for some editors:
- # while ( $mask_string =~ /\{[^{]*?\}/g ) {
- # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
- # if ( $1 eq '{' ) {
- # test file test1/braces.pl has many such examples.
+ my $tok = ';';
+ my $tok_len = 1;
+ if ( $want_left_space{';'} != WS_NO ) {
+ $tok = ' ;';
+ $tok_len = 2;
+ }
+ $tokens_to_go[$i] = $tok;
+ $token_lengths_to_go[$i] = $tok_len;
+ my $KK = $K_to_go[$i];
+ $rLL->[$KK]->[_TOKEN_] = $tok;
+ $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+ my $line_number = 1 + $self->get_old_line_index($KK);
+ $self->note_added_semicolon($line_number);
+ }
+ }
+ return;
+ }
- my ($csc) = @_;
+ sub recombine_breakpoints {
- # loop to examine characters one-by-one, RIGHT to LEFT and
- # build a balancing ending, LEFT to RIGHT.
- for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
+ # sub set_continuation_breaks is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Sometimes small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine.
+ #
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end ) = @_;
- my $char = substr( $csc, $pos, 1 );
+ my $rOpts_short_concatenation_item_length =
+ $rOpts->{'short-concatenation-item-length'};
+ my $rOpts_break_at_old_semicolon_breakpoints =
+ $rOpts->{'break-at-old-semicolon-breakpoints'};
- # ignore everything except structural characters
- next unless ( $matching_char{$char} );
+ # Make a list of all good joining tokens between the lines
+ # n-1 and n.
+ my @joint;
+ my $nmax = @{$ri_end} - 1;
+ for my $n ( 1 .. $nmax ) {
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
- # pop most recently appended character
- my $top = chop($csc);
+ my ( $itok, $itokp, $itokm );
- # push it back plus the mate to the newest character
- # unless they balance each other.
- $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
+ foreach my $itest ( $iend_1, $ibeg_2 ) {
+ my $type = $types_to_go[$itest];
+ if ( $is_math_op{$type}
+ || $is_amp_amp{$type}
+ || $is_assignment{$type}
+ || $type eq ':' )
+ {
+ $itok = $itest;
+ }
+ }
+ $joint[$n] = [$itok];
}
- # return the balanced string
- return $csc;
- }
-} ## end closure balance_csc_text
+ my $more_to_do = 1;
-sub add_closing_side_comment {
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = @{$ri_end};
+ my $reverse = 0;
+ while ($more_to_do) {
+ my $n_best = 0;
+ my $bs_best;
+ my $nmax = @{$ri_end} - 1;
- my $self = shift;
- my $rLL = $self->[_rLL_];
+ # Safety check for infinite loop
+ unless ( $nmax < $nmax_last ) {
- # add closing side comments after closing block braces if -csc used
- my ( $closing_side_comment, $cscw_block_comment );
+ # Shouldn't happen because splice below decreases nmax on each
+ # pass.
+ Fault("Program bug-infinite loop in recombine breakpoints\n");
+ }
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $skip_Section_3;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
- #---------------------------------------------------------------
- # Step 1: loop through all tokens of this line to accumulate
- # the text needed to create the closing side comments. Also see
- # how the line ends.
- #---------------------------------------------------------------
+ # loop over all remaining lines in this batch
+ for my $iter ( 1 .. $nmax ) {
- my ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label )
- = $self->accumulate_csc_text();
+ # alternating sweep direction gives symmetric results
+ # for recombining lines which exceed the line length
+ # such as eval {{{{.... }}}}
+ my $n;
+ if ($reverse) { $n = 1 + $nmax - $iter; }
+ else { $n = $iter }
- #---------------------------------------------------------------
- # Step 2: make the closing side comment if this ends a block
- #---------------------------------------------------------------
- my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $ibeg_nmax = $ri_beg->[$nmax];
- # if this line might end in a block closure..
- if (
- $terminal_type eq '}'
+ # combined line cannot be too long
+ my $excess =
+ $self->excess_line_length( $ibeg_1, $iend_2, 1, 1 );
+ next if ( $excess > 0 );
- # ..and either
- && (
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
- # the block is long enough
- ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
+ # terminal token of line 2 if any side comment is ignored:
+ my $iend_2t = $iend_2;
+ my $type_iend_2t = $type_iend_2;
- # or there is an existing comment to check
- || ( $have_side_comment
- && $rOpts->{'closing-side-comment-warnings'} )
- )
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
- # .. and if this is one of the types of interest
- && $block_type_to_go[$i_terminal] =~
- /$closing_side_comment_list_pattern/
+ my $bs_tweak = 0;
- # .. but not an anonymous sub
- # These are not normally of interest, and their closing braces are
- # often followed by commas or semicolons anyway. This also avoids
- # possible erratic output due to line numbering inconsistencies
- # in the cases where their closing braces terminate a line.
- && $block_type_to_go[$i_terminal] ne 'sub'
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
- # ..and the corresponding opening brace must is not in this batch
- # (because we do not need to tag one-line blocks, although this
- # should also be caught with a positive -csci value)
- && $self->mate_index_to_go($i_terminal) < 0
+ 0 && do {
+ print STDERR
+"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
- # ..and either
- && (
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
- # this is the last token (line doesn't have a side comment)
- !$have_side_comment
+ next
+ if ( $type_ibeg_2 eq ';'
+ && $rOpts_break_at_old_semicolon_breakpoints );
- # or the old side comment is a closing side comment
- || $tokens_to_go[$max_index_to_go] =~
- /$closing_side_comment_prefix_pattern/
- )
- )
- {
+ # a terminal '{' should stay where it is
+ # unless preceded by a fat comma
+ next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
- # then make the closing side comment text
- if ($block_label) { $block_label .= " " }
- my $token =
-"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
+ if ( $type_iend_2 eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' )
+ {
+ $iend_2t = $iend_2 - 2;
+ $type_iend_2t = $types_to_go[$iend_2t];
+ }
- # append any extra descriptive text collected above
- if ( $i_block_leading_text == $i_terminal ) {
- $token .= $block_leading_text;
- }
+ $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
+ }
- $token = balance_csc_text($token)
- if $rOpts->{'closing-side-comments-balanced'};
+ #----------------------------------------------------------
+ # Recombine Section 0:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code and
+ # to make formatting independent of whether breaks are to the
+ # left or right of an operator.
+ #----------------------------------------------------------
- $token =~ s/\s*$//; # trim any trailing whitespace
+ my ($itok) = @{ $joint[$n] };
+ if ($itok) {
- # handle case of existing closing side comment
- if ($have_side_comment) {
+ # FIXME: Patch - may not be necessary
+ my $iend_1 =
+ $type_iend_1 eq 'b'
+ ? $iend_1 - 1
+ : $iend_1;
- # warn if requested and tokens differ significantly
- if ( $rOpts->{'closing-side-comment-warnings'} ) {
- my $old_csc = $tokens_to_go[$max_index_to_go];
- my $new_csc = $token;
- $new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g; # trim all whitespace
- $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
+ my $iend_2 =
+ $type_iend_2 eq 'b'
+ ? $iend_2 - 1
+ : $iend_2;
+ ## END PATCH
- # Patch to handle multiple closing side comments at
- # else and elsif's. These have become too complicated
- # to check, so if we see an indication of
- # '[ if' or '[ # elsif', then assume they were made
- # by perltidy.
- if ( $block_type_to_go[$i_terminal] eq 'else' ) {
- if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
- }
- elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
- if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
- }
+ my $type = $types_to_go[$itok];
- # if old comment is contained in new comment,
- # only compare the common part.
- if ( length($new_csc) > length($old_csc) ) {
- $new_csc = substr( $new_csc, 0, length($old_csc) );
- }
+ if ( $type eq ':' ) {
- # if the new comment is shorter and has been limited,
- # only compare the common part.
- if ( length($new_csc) < length($old_csc)
- && $new_trailing_dots )
- {
- $old_csc = substr( $old_csc, 0, length($new_csc) );
- }
+ # do not join at a colon unless it disobeys the break request
+ if ( $itok eq $iend_1 ) {
+ next unless $want_break_before{$type};
+ }
+ else {
+ $leading_amp_count++;
+ next if $want_break_before{$type};
+ }
+ } ## end if ':'
- # any remaining difference?
- if ( $new_csc ne $old_csc ) {
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
- # just leave the old comment if we are below the threshold
- # for creating side comments
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- }
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
- # otherwise we'll make a note of it
- else {
+ # This can be important in math-intensive code.
- warning(
-"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
- );
+ my $good_combo;
- # save the old side comment in a new trailing block
- # comment
- my $timestamp = "";
- if ( $rOpts->{'timestamp'} ) {
- my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
- $year += 1900;
- $month += 1;
- $timestamp = "$year-$month-$day";
- }
- $cscw_block_comment =
-"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
-## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
- }
- }
- else {
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
+ my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
- # No differences.. we can safely delete old comment if we
- # are below the threshold
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq '#' );
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq 'b' );
- }
- }
- }
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
- # switch to the new csc (unless we deleted it!)
- if ($token) {
- $tokens_to_go[$max_index_to_go] = $token;
- my $K = $K_to_go[$max_index_to_go];
- $rLL->[$K]->[_TOKEN_] = $token;
- $rLL->[$K]->[_TOKEN_LENGTH_] =
- length($token); # NOTE: length no longer important
- }
- }
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
- # handle case of NO existing closing side comment
- else {
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp] } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ }
+ }
- # To avoid inserting a new token in the token arrays, we
- # will just return the new side comment so that it can be
- # inserted just before it is needed in the call to the
- # vertical aligner.
- $closing_side_comment = $token;
- }
- }
- return ( $closing_side_comment, $cscw_block_comment );
-}
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
-sub make_paren_name {
- my ( $self, $i ) = @_;
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
- # The token at index $i is a '('.
- # Create an alignment name for it to avoid incorrect alignments.
+ # otherwise look one more token to left
+ else {
- # Start with the name of the previous nonblank token...
- my $name = "";
- my $im = $i - 1;
- return "" if ( $im < 0 );
- if ( $types_to_go[$im] eq 'b' ) { $im--; }
- return "" if ( $im < 0 );
- $name = $tokens_to_go[$im];
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm] }
+ );
+ }
+ }
- # Prepend any sub name to an isolated -> to avoid unwanted alignments
- # [test case is test8/penco.pl]
- if ( $name eq '->' ) {
- $im--;
- if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
- $name = $tokens_to_go[$im] . $name;
- }
- }
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
- # Finally, remove any leading arrows
- $name =~ s/^->//;
- return $name;
-}
+ # Slight adjustment factor to make results
+ # independent of break before or after operator in
+ # long summed lists. (An operator and a space make
+ # two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
-sub send_lines_to_vertical_aligner {
+ $good_combo =
- my ($self) = @_;
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
- # This routine receives a batch of code for which the final line breaks
- # have been defined. Here we prepare the lines for passing to the vertical
- # aligner. We do the following tasks:
- # - mark certain vertical alignment tokens, such as '=', in each line
- # - make minor indentation adjustments
- # - do logical padding: insert extra blank spaces to help display certain
- # logical constructions
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right of
+ # joint
+ $itokpp == $iend_2
- my $this_batch = $self->[_this_batch_];
- my $rlines_K = $this_batch->[_rlines_K_];
- if ( !@{$rlines_K} ) {
- Fault("Unexpected call with no lines");
- return;
- }
- my $n_last_line = @{$rlines_K} - 1;
+ # short
+ && token_sequence_length( $itokp, $iend_2 )
+ < $two +
+ $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
- my $do_not_pad = $this_batch->[_do_not_pad_];
- my $peak_batch_size = $this_batch->[_peak_batch_size_];
- my $starting_in_quote = $this_batch->[_starting_in_quote_];
- my $ending_in_quote = $this_batch->[_ending_in_quote_];
- my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
- my $ibeg0 = $this_batch->[_ibeg0_];
- my $rK_to_go = $this_batch->[_rK_to_go_];
- my $batch_count = $this_batch->[_batch_count_];
+ # short
+ && token_sequence_length( $ibeg_1, $itokm )
+ < 2 - $two +
+ $rOpts_short_concatenation_item_length
+ )
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
+ )
- my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
- my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] } )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp] } )
+ )
- # Construct indexes to the global_to_go arrays so that called routines can
- # still access those arrays. This might eventually be removed
- # when all called routines have been converted to access token values
- # in the rLL array instead.
- my $Kbeg0 = $Kbeg_next;
- my ( $ri_first, $ri_last );
- foreach my $rline ( @{$rlines_K} ) {
- my ( $Kbeg, $Kend ) = @{$rline};
- my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
- my $iend = $ibeg0 + $Kend - $Kbeg0;
- push @{$ri_first}, $ibeg;
- push @{$ri_last}, $iend;
- }
+ ;
+ }
- my ( $cscw_block_comment, $closing_side_comment );
- if ( $rOpts->{'closing-side-comments'} ) {
- ( $closing_side_comment, $cscw_block_comment ) =
- $self->add_closing_side_comment();
- }
+ # it is also good to combine if we can reduce to 2 lines
+ if ( !$good_combo ) {
- my $rindentation_list = [0]; # ref to indentations for each line
+ # index on other line where same token would be in a
+ # long chain.
+ my $iother =
+ ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
- # define the array @{$ralignment_type_to_go} for the output tokens
- # which will be non-blank for each special token (such as =>)
- # for which alignment is required.
- my $ralignment_type_to_go =
- $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
- # flush before a long if statement to avoid unwanted alignment
- if ( $n_last_line > 0
- && $type_beg_next eq 'k'
- && $token_beg_next =~ /^(if|unless)$/ )
- {
- $self->flush_vertical_aligner();
- }
+ next unless ($good_combo);
- $self->undo_ci( $ri_first, $ri_last );
+ } ## end math
- $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
- $starting_in_quote )
- if ( $rOpts->{'logical-padding'} );
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
- # loop to prepare each line for shipment
- my $in_comma_list;
- my ( $Kbeg, $type_beg, $token_beg );
- my ( $Kend, $type_end );
- for my $n ( 0 .. $n_last_line ) {
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- my $rline = $rlines_K->[$n];
- my $forced_breakpoint = $rline->[2];
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Join welded nested containers immediately
+ #----------------------------------------------------------
+ if ( $self->weld_len_right_to_go($iend_1)
+ || $self->weld_len_left_to_go($ibeg_2) )
+ {
+ $n_best = $n;
- # we may need to look at variables on three consecutive lines ...
+ # Old coding alternated sweep direction: no longer needed
+ # $reverse = 1 - $reverse;
+ last;
+ }
+ $reverse = 0;
- # Some vars on line [n-1], if any:
- my $Kbeg_last = $Kbeg;
- my $type_beg_last = $type_beg;
- my $token_beg_last = $token_beg;
- my $Kend_last = $Kend;
- my $type_end_last = $type_end;
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
- # Some vars on line [n]:
- $Kbeg = $Kbeg_next;
- $type_beg = $type_beg_next;
- $token_beg = $token_beg_next;
- $Kend = $Kend_next;
- $type_end = $type_end_next;
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
- # We use two slightly different definitions of level jump at the end
- # of line:
- # $ljump is the level jump needed by 'sub set_adjusted_indentation'
- # $level_jump is the level jump needed by the vertical aligner.
- my $ljump = 0; # level jump at end of line
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ # But we do not want to do this for something like the -lp
+ # option where the paren is not outdentable because the
+ # trailing clause will be far to the right.
+ #
+ # The logic here is synchronized with the logic in sub
+ # sub set_adjusted_indentation, which actually does
+ # the outdenting.
+ #
+ $skip_Section_3 ||= $this_line_is_semicolon_terminated
- # Get some vars on line [n+1], if any:
- if ( $n < $n_last_line ) {
- ( $Kbeg_next, $Kend_next ) =
- @{ $rlines_K->[ $n + 1 ] };
- $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
- $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
- }
+ # only one token on last line
+ && $ibeg_1 == $iend_1
- # level jump at end of line for the vertical aligner:
- my $level_jump =
- $Kend >= $Klimit
- ? 0
- : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
- $self->delete_needless_alignments( $ibeg, $iend,
- $ralignment_type_to_go );
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
- my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
- $self->make_alignment_patterns( $ibeg, $iend,
- $ralignment_type_to_go );
+ # only leading '&&', '||', and ':' if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
- my ( $indentation, $lev, $level_end, $terminal_type,
- $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
- = $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
- $rpatterns, $ri_first, $ri_last,
- $rindentation_list, $ljump, $starting_in_quote,
- $is_static_block_comment, );
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
- # which are long quotes, if allowed
- ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in set_adjusted_indentation which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'}
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $skip_Section_3 ||= 1;
+ }
- # which are long block comments, if allowed
- || (
- $type_beg eq '#'
- && $rOpts->{'outdent-long-comments'}
+ next
+ unless (
+ $skip_Section_3
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
+ );
+ }
- my $rvertical_tightness_flags =
- $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
+ elsif ( $type_iend_1 eq '{' ) {
- # flush an outdented line to avoid any unwanted vertical alignment
- $self->flush_vertical_aligner() if ($is_outdented_line);
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ next if $forced_breakpoint_to_go[$iend_1];
+ }
- # Set a flag at the final ':' of a ternary chain to request
- # vertical alignment of the final term. Here is a
- # slightly complex example:
- #
- # $self->{_text} = (
- # !$section ? ''
- # : $type eq 'item' ? "the $section entry"
- # : "the section on $section"
- # )
- # . (
- # $page
- # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
- # : ' elsewhere in this document'
- # );
- #
- my $is_terminal_ternary = 0;
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ next unless $want_break_before{$type_iend_1};
+ }
- if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
- my $last_leading_type = $n > 0 ? $type_beg_last : ':';
- if ( $terminal_type ne ';'
- && $n_last_line > $n
- && $level_end == $lev )
- {
- $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
- $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
- }
- if (
- $last_leading_type eq ':'
- && ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $terminal_type ne ':' && $level_end < $lev ) )
- )
- {
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
- # the terminal term must not contain any ternary terms, as in
- # my $ECHO = (
- # $Is_MSWin32 ? ".\\echo$$"
- # : $Is_MacOS ? ":echo$$"
- # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
- # );
- $is_terminal_ternary = 1;
+ # Do not recombine different levels
+ next
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
- my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
- while ( defined($KP) && $KP <= $Kend ) {
- my $type_KP = $rLL->[$KP]->[_TYPE_];
- if ( $type_KP eq '?' || $type_KP eq ':' ) {
- $is_terminal_ternary = 0;
- last;
- }
- $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
+ # do not recombine unless next line ends in :
+ next unless $type_iend_2 eq ':';
}
- }
- }
- my $level_adj = $lev;
- my $radjusted_levels = $self->[_radjusted_levels_];
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
- $level_adj = $radjusted_levels->[$Kbeg];
- if ( $level_adj < 0 ) { $level_adj = 0 }
- }
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
- # add any new closing side comment to the last line
- if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
- $rfields->[-1] .= " $closing_side_comment";
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
- # NOTE: Patch for csc. We can just use 1 for the length of the csc
- # because its length should not be a limiting factor from here on.
- $rfield_lengths->[-1] += 2;
- }
+ # an isolated '},' may join with an identifier + ';'
+ # this is useful for the class of a 'bless' statement (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
- # send this new line down the pipe
- my $rvalign_hash = {};
- $rvalign_hash->{level} = $lev;
- $rvalign_hash->{level_end} = $level_end;
- $rvalign_hash->{level_adj} = $level_adj;
- $rvalign_hash->{indentation} = $indentation;
- $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list;
- $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
- $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
- $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
- $rvalign_hash->{do_not_pad} = $do_not_pad;
- $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
- $rvalign_hash->{level_jump} = $level_jump;
- $rvalign_hash->{rfields} = $rfields;
- $rvalign_hash->{rpatterns} = $rpatterns;
- $rvalign_hash->{rtokens} = $rtokens;
- $rvalign_hash->{rfield_lengths} = $rfield_lengths;
- $rvalign_hash->{terminal_block_type} = $terminal_block_type;
- $rvalign_hash->{batch_count} = $batch_count;
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- my $vao = $self->[_vertical_aligner_object_];
- $vao->valign_input($rvalign_hash);
+ # but otherwise ..
+ else {
- $in_comma_list = $type_end eq ',' && $forced_breakpoint;
+ # do not recombine after a comma unless this will leave
+ # just 1 more line
+ next unless ( $n + 1 >= $nmax );
- # flush an outdented line to avoid any unwanted vertical alignment
- $self->flush_vertical_aligner() if ($is_outdented_line);
+ # do not recombine if there is a change in indentation depth
+ next
+ if (
+ $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
- $do_not_pad = 0;
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ next if $saw_paren;
+ }
+ }
- # Set flag indicating if this line ends in an opening
- # token and is very short, so that a blank line is not
- # needed if the subsequent line is a comment.
- # Examples of what we are looking for:
- # {
- # && (
- # BEGIN {
- # default {
- # sub {
- $self->[_last_output_short_opening_token_]
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
- # line ends in opening token
- = $type_end =~ /^[\{\(\[L]$/
+ # No longer doing this
+ }
- # and either
- && (
- # line has either single opening token
- $Kend == $Kbeg
+ elsif ( $type_iend_1 eq ')' ) {
- # or is a single token followed by opening token.
- # Note that sub identifiers have blanks like 'sub doit'
- || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ )
- )
+ # No longer doing this
+ }
- # and limit total to 10 character widths
- && token_sequence_length( $ibeg, $iend ) <= 10;
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ next;
+ }
- } # end of loop to output each line
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
+ next if $old_breakpoint_to_go[$iend_1]
- # output any new -cscw block comment
- if ($cscw_block_comment) {
- $self->flush_vertical_aligner();
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
- }
- return;
-}
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1;
-{ ## begin closure make_alignment_patterns
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary =
+ ( $type_ibeg_1 eq '?'
+ && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
- my %block_type_map;
- my %keyword_map;
- my %operator_map;
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
- BEGIN {
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
- # map related block names into a common name to
- # allow alignment
- %block_type_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'if',
- 'default' => 'if',
- 'case' => 'if',
- 'sort' => 'map',
- 'grep' => 'map',
- );
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. [elsif.pl]
- %keyword_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'given',
- 'default' => 'given',
- 'case' => 'switch',
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # treat an 'undef' similar to numbers and quotes
- 'undef' => 'Q',
- );
+ # or the next line ends in an open paren or brace
+ # and the break hasn't been forced [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
- # map certain operators to the same class for pattern matching
- %operator_map = (
- '!~' => '=~',
- '+=' => '+=',
- '-=' => '+=',
- '*=' => '+=',
- '/=' => '+=',
- );
- }
+ # do not recombine if the two lines might align well
+ # this is a very approximate test for this
+ && (
- sub delete_needless_alignments {
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+ # RT#127633 - the leading tokens are not operators
+ ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
- # Remove unwanted alignments. This routine is a place to remove
- # alignments which might cause problems at later stages. There are
- # currently two types of fixes:
+ # or they are different
+ || ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
+ )
+ );
- # 1. Remove excess parens
- # 2. Remove alignments within 'elsif' conditions
+ if (
- # Patch #1: Excess alignment of parens can prevent other good
- # alignments. For example, note the parens in the first two rows of
- # the following snippet. They would normally get marked for alignment
- # and aligned as follows:
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && ( !$rOpts_line_up_parentheses
+ || $type_iend_2 ne ',' )
+ )
+ {
- # This causes unnecessary paren alignment and prevents the third equals
- # from aligning. If we remove the unwanted alignments we get:
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last
+ # token in case it is an opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 1 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
+ # ok to recombine if no level changes before last token
+ if ( $tv > 0 ) {
- # A rule for doing this which works well is to remove alignment of
- # parens whose containers do not contain other aligning tokens, with
- # the exception that we always keep alignment of the first opening
- # paren on a line (for things like 'if' and 'elsif' statements).
+ # otherwise, do not recombine if more than two
+ # level changes.
+ next if ( $tv > 1 );
- # Setup needed constants
- my $i_good_paren = -1;
- my $imin_match = $iend + 1;
- my $i_elsif_close = $ibeg - 1;
- my $i_elsif_open = $iend + 1;
- if ( $iend > $ibeg ) {
- if ( $types_to_go[$ibeg] eq 'k' ) {
+ # check total complexity of the two adjacent lines
+ # that will occur if we do this join
+ my $istop =
+ ( $n < $nmax )
+ ? $ri_end->[ $n + 1 ]
+ : $iend_2;
+ foreach my $i ( $iend_2 .. $istop ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
- # Paren patch: mark a location of a paren we should keep, such
- # as one following something like a leading 'if', 'elsif',..
- $i_good_paren = $ibeg + 1;
- if ( $types_to_go[$i_good_paren] eq 'b' ) {
- $i_good_paren++;
+ # do not recombine if total is more than 2 level changes
+ next if ( $tv > 2 );
+ }
+ }
+ }
+
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
}
- # 'elsif' patch: remember the range of the parens of an elsif,
- # and do not make alignments within them because this can cause
- # loss of padding and overall brace alignment in the vertical
- # aligner.
- if ( $tokens_to_go[$ibeg] eq 'elsif'
- && $i_good_paren < $iend
- && $tokens_to_go[$i_good_paren] eq '(' )
- {
- $i_elsif_open = $i_good_paren;
- $i_elsif_close = $self->mate_index_to_go($i_good_paren);
- }
- }
- }
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
- # Loop to make the fixes on this line
- my @imatch_list;
- for my $i ( $ibeg .. $iend ) {
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
- if ( $ralignment_type_to_go->[$i] ) {
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- # Patch #2: undo alignment within elsif parens
- if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
- $ralignment_type_to_go->[$i] = '';
- next;
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
+
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
+ }
}
- push @imatch_list, $i;
- }
- if ( $tokens_to_go[$i] eq ')' ) {
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
- # Patch #1: undo the corresponding opening paren if:
- # - it is at the top of the stack
- # - and not the first overall opening paren
- # - does not follow a leading keyword on this line
- my $imate = $self->mate_index_to_go($i);
- if ( @imatch_list
- && $imatch_list[-1] eq $imate
- && ( $ibeg > 1 || @imatch_list > 1 )
- && $imate > $i_good_paren )
- {
- $ralignment_type_to_go->[$imate] = '';
- pop @imatch_list;
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ # Note that we are skipping the rest of this section
+ # and the rest of the loop to do the join
+ if ($skip_Section_3) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ $n_best = $n;
+ last;
}
- }
- }
- return;
- }
-
- my $field_length_sum = sub {
- my ( $i1, $i2 ) = @_;
- my $len_field = 0;
- foreach ( $i1 .. $i2 ) {
- $len_field += $token_lengths_to_go[$_];
- }
- return $len_field;
- };
- sub make_alignment_patterns {
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- # Here we do some important preliminary work for the
- # vertical aligner. We create three arrays for one
- # output line. These arrays contain strings that can
- # be tested by the vertical aligner to see if
- # consecutive lines can be aligned vertically.
- #
- # The three arrays are indexed on the vertical
- # alignment fields and are:
- # @tokens - a list of any vertical alignment tokens for this line.
- # These are tokens, such as '=' '&&' '#' etc which
- # we want to might align vertically. These are
- # decorated with various information such as
- # nesting depth to prevent unwanted vertical
- # alignment matches.
- # @fields - the actual text of the line between the vertical alignment
- # tokens.
- # @patterns - a modified list of token types, one for each alignment
- # field. These should normally each match before alignment is
- # allowed, even when the alignment tokens match.
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
- my @tokens = ();
- my @fields = ();
- my @patterns = ();
- my @field_lengths = ();
- my $i_start = $ibeg;
+ $leading_amp_count++;
- my $depth = 0;
- my %container_name = ( 0 => "" );
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1}
+ && $tokens_to_go[$iend_2] eq '(' )
- my $j = 0; # field index
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
- $patterns[0] = "";
- my %token_count;
- for my $i ( $ibeg .. $iend ) {
+ next if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
- # Keep track of containers balanced on this line only.
- # These are used below to prevent unwanted cross-line alignments.
- # Unbalanced containers already avoid aligning across
- # container boundaries.
- my $tok = $tokens_to_go[$i];
- my $depth_last = $depth;
- if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
- # if container is balanced on this line...
- my $i_mate = $self->mate_index_to_go($i);
- if ( $i_mate > $i && $i_mate <= $iend ) {
- $depth++;
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
- # Append the previous token name to make the container name
- # more unique. This name will also be given to any commas
- # within this container, and it helps avoid undesirable
- # alignments of different types of containers.
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
- # Containers beginning with { and [ are given those names
- # for uniqueness. That way commas in different containers
- # will not match. Here is an example of what this prevents:
- # a => [ 1, 2, 3 ],
- # b => { b1 => 4, b2 => 5 },
- # Here is another example of what we avoid by labeling the
- # commas properly:
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
- # is_d( [ $a, $a ], [ $b, $c ] );
- # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
- # is_d( [ \$a, \$a ], [ \$b, \$c ] );
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
- my $name = $tok;
- if ( $tok eq '(' ) {
- $name = $self->make_paren_name($i);
+ # ...otherwise recombine only if it looks like a chain.
+ # we will just look at a few nearby lines to see if
+ # this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
}
- $container_name{$depth} = "+" . $name;
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- # Make the container name even more unique if necessary.
- # If we are not vertically aligning this opening paren,
- # append a character count to avoid bad alignment because
- # it usually looks bad to align commas within containers
- # for which the opening parens do not align. Here
- # is an example very BAD alignment of commas (because
- # the atan2 functions are not all aligned):
- # $XY =
- # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
- # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
- # $X * atan2( $X, 1 ) -
- # $Y * atan2( $Y, 1 );
- #
- # On the other hand, it is usually okay to align commas if
- # opening parens align, such as:
- # glVertex3d( $cx + $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy + $s * $ys, $z );
- # glVertex3d( $cx - $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy - $s * $ys, $z );
- #
- # To distinguish between these situations, we will
- # append the length of the line from the previous matching
- # token, or beginning of line, to the function name. This
- # will allow the vertical aligner to reject undesirable
- # matches.
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
+ next
+ unless (
- # if we are not aligning on this paren...
- if ( !$ralignment_type_to_go->[$i] ) {
+ # ... unless there is just one and we can reduce
+ # this to two lines if we do. For example, this
+ #
+ #
+ # $bodyA .=
+ # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
+ #
+ # looks better than this:
+ # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
+ # . '$args .= $pat;'
- # Sum length from previous alignment
- my $len = token_sequence_length( $i_start, $i - 1 );
+ (
+ $n == 2
+ && $n == $nmax
+ && $type_ibeg_1 ne $type_ibeg_2
+ )
- # Minor patch: do not include the length of any '!'.
- # Otherwise, commas in the following line will not
- # match
- # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
- # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
- if ( grep { $_ eq '!' }
- @types_to_go[ $i_start .. $i - 1 ] )
- {
- $len -= 1;
- }
-
- if ( $i_start == $ibeg ) {
-
- # For first token, use distance from start of line
- # but subtract off the indentation due to level.
- # Otherwise, results could vary with indentation.
- $len += leading_spaces_to_go($ibeg) -
- $levels_to_go[$i_start] * $rOpts_indent_columns;
- if ( $len < 0 ) { $len = 0 }
- }
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
- # tack this length onto the container name to try
- # to make a unique token name
- $container_name{$depth} .= "-" . $len;
- }
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length )
+ );
}
- }
- elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) {
- $depth-- if $depth > 0;
- }
- # if we find a new synchronization token, we are done with
- # a field
- if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
-
- my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
- # map similar items
- my $tok_map = $operator_map{$tok};
- $tok = $tok_map if ($tok_map);
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+ $type_ibeg_1 eq '}'
+ || (
- # make separators in different nesting depths unique
- # by appending the nesting depth digit.
- if ( $raw_tok ne '#' ) {
- $tok .= "$nesting_depth_to_go[$i]";
- }
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
- # also decorate commas with any container name to avoid
- # unwanted cross-line alignments.
- if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ )
+ );
- # If we are at an opening token which increased depth, we have
- # to use the name from the previous depth.
- my $depth_p =
- ( $depth_last < $depth ? $depth_last : $depth );
- if ( $container_name{$depth_p} ) {
- $tok .= $container_name{$depth_p};
+ #X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless $old_breakpoint_to_go[$iend_1];
}
- }
- # Patch to avoid aligning leading and trailing if, unless.
- # Mark trailing if, unless statements with container names.
- # This makes them different from leading if, unless which
- # are not so marked at present. If we ever need to name
- # them too, we could use ci to distinguish them.
- # Example problem to avoid:
- # return ( 2, "DBERROR" )
- # if ( $retval == 2 );
- # if ( scalar @_ ) {
- # my ( $a, $b, $c, $d, $e, $f ) = @_;
- # }
- if ( $raw_tok eq '(' ) {
- my $ci = $ci_levels_to_go[$ibeg];
- if ( $container_name{$depth} =~ /^\+(if|unless)/
- && $ci )
+ # handle leading 'and' and 'xor'
+ elsif ($tokens_to_go[$ibeg_2] eq 'and'
+ || $tokens_to_go[$ibeg_2] eq 'xor' )
{
- $tok .= $container_name{$depth};
- }
- }
-
- # Decorate block braces with block types to avoid
- # unwanted alignments such as the following:
- # foreach ( @{$routput_array} ) { $fh->print($_) }
- # eval { $fh->close() };
- if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
- my $block_type = $block_type_to_go[$i];
- # map certain related block types to allow
- # else blocks to align
- $block_type = $block_type_map{$block_type}
- if ( defined( $block_type_map{$block_type} ) );
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
- # remove sub names to allow one-line sub braces to align
- # regardless of name
- #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
- if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
- # allow all control-type blocks to align
- if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
+ }
- $tok .= $block_type;
- }
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
- # Mark multiple copies of certain tokens with the copy number
- # This will allow the aligner to decide if they are matched.
- # For now, only do this for equals. For example, the two
- # equals on the next line will be labeled '=0' and '=0.2'.
- # Later, the '=0.2' will be ignored in alignment because it
- # has no match.
+ # FIXME: This is still experimental..may not be too useful
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # $| = $debug = 1 if $opt_d;
- # $full_index = 1 if $opt_i;
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
- if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
- $token_count{$tok}++;
- if ( $token_count{$tok} > 1 ) {
- $tok .= '.' . $token_count{$tok};
+ );
}
- }
- # concatenate the text of the consecutive tokens to form
- # the field
- push( @fields,
- join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
-
- push @field_lengths, $field_length_sum->( $i_start, $i - 1 );
+ # handle all other leading keywords
+ else {
- # store the alignment token for this field
- push( @tokens, $tok );
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ next
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
+ }
+ }
- # get ready for the next batch
- $i_start = $i;
- $j++;
- $patterns[$j] = "";
- }
+ # similar treatment of && and || as above for 'and' and 'or':
+ # NOTE: This block of code is currently bypassed because
+ # of a previous block but is retained for possible future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- # continue accumulating tokens
- # handle non-keywords..
- if ( $types_to_go[$i] ne 'k' ) {
- my $type = $types_to_go[$i];
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- # Mark most things before arrows as a quote to
- # get them to line up. Testfile: mixed.pl.
- if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
- my $next_type = $types_to_go[ $i + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
- $type = 'Q';
+ # previous line begins with an 'if' or 'unless' keyword
+ && $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
- # Patch to ignore leading minus before words,
- # by changing pattern 'mQ' into just 'Q',
- # so that we can align things like this:
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
- }
+ );
}
- # Convert a bareword within braces into a quote for matching.
- # This will allow alignment of expressions like this:
- # local ( $SIG{'INT'} ) = IGNORE;
- # local ( $SIG{ALRM} ) = 'POSTMAN';
- if ( $type eq 'w'
- && $i > $ibeg
- && $i < $iend
- && $types_to_go[ $i - 1 ] eq 'L'
- && $types_to_go[ $i + 1 ] eq 'R' )
- {
- $type = 'Q';
- }
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ next unless ( $n == 1 || $n == $nmax );
+ next if $old_breakpoint_to_go[$iend_1];
+ next
+ unless (
- # patch to make numbers and quotes align
- if ( $type eq 'n' ) { $type = 'Q' }
+ # unless we can reduce this to two lines
+ $nmax == 2
- # patch to ignore any ! in patterns
- if ( $type eq '!' ) { $type = '' }
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
- $patterns[$j] .= $type;
- }
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # for keywords we have to use the actual text
- else {
-
- my $tok = $tokens_to_go[$i];
-
- # but map certain keywords to a common string to allow
- # alignment.
- $tok = $keyword_map{$tok}
- if ( defined( $keyword_map{$tok} ) );
- $patterns[$j] .= $tok;
- }
- }
-
- # done with this line .. join text of tokens to make the last field
- push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
- push @field_lengths, $field_length_sum->( $i_start, $iend );
- return ( \@tokens, \@fields, \@patterns, \@field_lengths );
- }
-
-} ## end closure make_alignment_patterns
-
-{ ## begin closure match_opening_and_closing_tokens
-
- # closure to keep track of unbalanced containers.
- # arrays shared by the routines in this block:
- my %saved_opening_indentation;
- my @unmatched_opening_indexes_in_this_batch;
- my @unmatched_closing_indexes_in_this_batch;
- my %comma_arrow_count;
+ # or this is a short line ending in ;
+ || ( $n == $nmax && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- sub initialize_saved_opening_indentation {
- %saved_opening_indentation = ();
- return;
- }
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
- sub is_unbalanced_batch {
- return @unmatched_opening_indexes_in_this_batch +
- @unmatched_closing_indexes_in_this_batch;
- }
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
- sub comma_arrow_count {
- my $seqno = shift;
- return $comma_arrow_count{$seqno};
- }
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
- sub match_opening_and_closing_tokens {
+ # Require a few extra spaces before recombining lines if we are
+ # at an old breakpoint unless this is a simple list or terminal
+ # line. The goal is to avoid oscillating between two
+ # quasi-stable end states. For example this snippet caused
+ # problems:
+## my $this =
+## bless {
+## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
+## },
+## $type;
+ next
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
- # Match up indexes of opening and closing braces, etc, in this batch.
- # This has to be done after all tokens are stored because unstoring
- # of tokens would otherwise cause trouble.
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $ri_beg->[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
- my ($self) = @_;
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
- @unmatched_opening_indexes_in_this_batch = ();
- @unmatched_closing_indexes_in_this_batch = ();
- %comma_arrow_count = ();
- my $comma_arrow_count_contained = 0;
+ # honor no-break's
+ next if ( $bs >= NO_BREAK - 1 );
- foreach my $i ( 0 .. $max_index_to_go ) {
- if ( $type_sequence_to_go[$i] ) {
- my $token = $tokens_to_go[$i];
- if ( $token =~ /^[\(\[\{\?]$/ ) {
- push @unmatched_opening_indexes_in_this_batch, $i;
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
}
- elsif ( $token =~ /^[\)\]\}\:]$/ ) {
+ else {
- my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
- if ( defined($i_mate) && $i_mate >= 0 ) {
- if ( $type_sequence_to_go[$i_mate] ==
- $type_sequence_to_go[$i] )
- {
- $mate_index_to_go[$i] = $i_mate;
- $mate_index_to_go[$i_mate] = $i;
- my $seqno = $type_sequence_to_go[$i];
- if ( $comma_arrow_count{$seqno} ) {
- $comma_arrow_count_contained +=
- $comma_arrow_count{$seqno};
- }
- }
- else {
- push @unmatched_opening_indexes_in_this_batch,
- $i_mate;
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- else {
- push @unmatched_closing_indexes_in_this_batch, $i;
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
}
}
}
- elsif ( $tokens_to_go[$i] eq '=>' ) {
- if (@unmatched_opening_indexes_in_this_batch) {
- my $j = $unmatched_opening_indexes_in_this_batch[-1];
- my $seqno = $type_sequence_to_go[$j];
- $comma_arrow_count{$seqno}++;
- }
+
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @{$ri_beg}, $n_best, 1;
+ splice @{$ri_end}, $n_best - 1, 1;
+ splice @joint, $n_best, 1;
+
+ # keep going if we are still making progress
+ $more_to_do++;
}
}
- return $comma_arrow_count_contained;
+ return ( $ri_beg, $ri_end );
}
+} ## end closure recombine_breakpoints
- sub save_opening_indentation {
+sub insert_final_ternary_breaks {
- # This should be called after each batch of tokens is output. It
- # saves indentations of lines of all unmatched opening tokens.
- # These will be used by sub get_opening_indentation.
+ my ( $self, $ri_left, $ri_right ) = @_;
- my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
+ # Called once per batch to look for and do any final line breaks for
+ # long ternary chains
- # we no longer need indentations of any saved indentations which
- # are unmatched closing tokens in this batch, because we will
- # never encounter them again. So we can delete them to keep
- # the hash size down.
- foreach (@unmatched_closing_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- delete $saved_opening_indentation{$seqno};
- }
+ my $nmax = @{$ri_right} - 1;
- # we need to save indentations of any unmatched opening tokens
- # in this batch because we may need them in a subsequent batch.
- foreach (@unmatched_opening_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- $saved_opening_indentation{$seqno} = [
- lookup_opening_indentation(
- $_, $ri_first, $ri_last, $rindentation_list
- )
- ];
- }
- return;
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
}
- sub get_saved_opening_indentation {
- my ($seqno) = @_;
- my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
+ # For long ternary chains,
+ # if the first : we see has its ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
- if ($seqno) {
- if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset, $is_leading ) =
- @{ $saved_opening_indentation{$seqno} };
- $exists = 1;
+ # For now, a good break is either a comma or,
+ # in a long chain, a 'return'.
+ # Patch for RT #126633: added the $nmax>1 check to avoid
+ # breaking after a return for a simple ternary. For longer
+ # chains the break after return allows vertical alignment, so
+ # it is still done. So perltidy -wba='?' will not break
+ # immediately after the return in the following statement:
+ # sub x {
+ # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
+ # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
+ # }
+ if (
+ (
+ $type eq ','
+ || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
+ )
+ && $self->in_same_container_i( $ii, $i_question )
+ )
+ {
+ push @insert_list, $ii;
+ last;
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left,
+ $ri_right );
}
}
+ }
+ return;
+}
- # some kind of serious error it doesn't exist
- # (example is badfile.t)
+sub insert_breaks_before_list_opening_containers {
- return ( $indent, $offset, $is_leading, $exists );
- }
-} ## end closure match_opening_and_closing_tokens
+ my ( $self, $ri_left, $ri_right ) = @_;
-sub get_opening_indentation {
+ # This routine is called once per batch to implement the parameters
+ # --break-before-hash-brace, etc.
- # get the indentation of the line which output the opening token
- # corresponding to a given closing token in the current output batch.
- #
- # given:
- # $i_closing - index in this line of a closing token ')' '}' or ']'
- #
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line.
- #
- # return:
- # -the indentation of the line which contained the opening token
- # which matches the token at index $i_opening
- # -and its offset (number of columns) from the start of the line
- #
- my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
-
- # first, see if the opening token is in the current batch
- my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset, $is_leading, $exists );
- $exists = 1;
- if ( $i_opening >= 0 ) {
+ # Nothing to do if none of these parameters has been set
+ return unless %break_before_container_types;
- # it is..look up the indentation
- ( $indent, $offset, $is_leading ) =
- lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
- $rindentation_list );
- }
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 0 );
- # if not, it should have been stored in the hash by a previous batch
- else {
- ( $indent, $offset, $is_leading, $exists ) =
- get_saved_opening_indentation( $type_sequence_to_go[$i_closing] );
- }
- return ( $indent, $offset, $is_leading, $exists );
-}
+ my $rLL = $self->[_rLL_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $rhas_broken_container = $self->[_rhas_broken_container_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
-sub lookup_opening_indentation {
+ # scan the ends of all lines
+ my @insert_list;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ next unless ( $ir > $il );
+ my $Kl = $K_to_go[$il];
+ my $Kr = $K_to_go[$ir];
+ my $Kend = $Kr;
+ my $iend = $ir;
+ my $type_end = $rLL->[$Kr]->[_TYPE_];
- # get the indentation of the line in the current output batch
- # which output a selected opening token
- #
- # given:
- # $i_opening - index of an opening token in the current output batch
- # whose line indentation we need
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line. (NOTE: the first slot in
- # this list is the last returned line number, and this is
- # followed by the list of indentations).
- #
- # return
- # -the indentation of the line which contained token $i_opening
- # -and its offset (number of columns) from the start of the line
+ # Backup before any side comment
+ if ( $type_end eq '#' ) {
+ $Kend = $self->K_previous_nonblank($Kr);
+ next unless defined($Kend);
+ $type_end = $rLL->[$Kend]->[_TYPE_];
+ $iend = $ir + ( $Kend - $Kr );
+ }
- my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
+ next unless ( $Kl < $Kend - 1 );
- if ( !@{$ri_last} ) {
- warning("Error in opening_indentation: no lines");
- return;
- }
+ my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
+ next unless ( defined($seqno) );
- my $nline = $rindentation_list->[0]; # line number of previous lookup
+ # Only for types of container tokens with a non-default break option
+ my $token_end = $rLL->[$Kend]->[_TOKEN_];
+ my $break_option = $break_before_container_types{$token_end};
+ next unless ($break_option);
- # reset line location if necessary
- $nline = 0 if ( $i_opening < $ri_start->[$nline] );
+ # Require previous nonblank to be certain types (= and =>)
+ # Note similar coding in sub adjust_container_indentation
+ my $Kprev = $Kend - 1;
+ my $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ if ( $prev_type eq 'b' ) {
+ $Kprev--;
+ next if ( $Kprev <= $Kl );
+ $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ }
+ next unless ( $is_equal_or_fat_comma{$prev_type} );
- # find the correct line
- unless ( $i_opening > $ri_last->[-1] ) {
- while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
- }
+ # This must be a list (this will exclude all code blocks)
+ next unless $self->is_list($seqno);
- # error - token index is out of bounds - shouldn't happen
- else {
- warning(
-"non-fatal program bug in lookup_opening_indentation - index out of range\n"
- );
- report_definite_bug();
- $nline = $#{$ri_last};
- }
+ # Never break a weld
+ next if ( $self->weld_len_left( $seqno, $token_end ) );
- $rindentation_list->[0] =
- $nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- my $is_leading = ( $ibeg == $i_opening );
- return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
-}
+ # Final decision is based on selected option:
-{ ## begin closure set_adjusted_indentation
+ # Option 1 = stable, try to follow input
+ my $ok_to_break;
+ if ( $break_option == 1 ) {
+ if ( $ir - 2 > $il ) {
+ $ok_to_break = $old_breakpoint_to_go[ $ir - 2 ];
+ }
+ }
- my ( $last_indentation_written, $last_unadjusted_indentation,
- $last_leading_token );
+ # Option 2 = only if complex list, meaning:
+ # - this list contains a broken container, or
+ # - this list is contained in a broken list
+ elsif ( $break_option == 2 ) {
+ $ok_to_break = $rhas_broken_container->{$seqno};
+ if ( !$ok_to_break ) {
+ my $parent = $rparent_of_seqno->{$seqno};
+ $ok_to_break = $self->is_list($parent);
+ }
+ }
- sub initialize_adjusted_indentation {
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
- return;
- }
+ # Option 3 = always break
+ elsif ( $break_option == 3 ) {
+ $ok_to_break = 1;
+ }
- sub set_adjusted_indentation {
+ # Shouldn't happen! Bad flag, but make behavior same as 3
+ else {
+ $ok_to_break = 1;
+ }
- # This routine has the final say regarding the actual indentation of
- # a line. It starts with the basic indentation which has been
- # defined for the leading token, and then takes into account any
- # options that the user has set regarding special indenting and
- # outdenting.
+ next unless ($ok_to_break);
- my (
- $self, $ibeg,
- $iend, $rfields,
- $rpatterns, $ri_first,
- $ri_last, $rindentation_list,
- $level_jump, $starting_in_quote,
- $is_static_block_comment,
- ) = @_;
+ # This meets the criteria, so install a break before the opening token.
+ my $Kbreak = $self->K_previous_nonblank($Kend);
+ my $ibreak = $Kbreak - $Kl + $il;
+ next if ( $ibreak < $il );
+ next if ( $nobreak_to_go[$ibreak] );
+ push @insert_list, $ibreak;
- my $rLL = $self->[_rLL_];
+ }
- # we need to know the last token of this line
- my ( $terminal_type, $i_terminal ) =
- $self->terminal_type_i( $ibeg, $iend );
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
- my $terminal_block_type = $block_type_to_go[$i_terminal];
- my $is_outdented_line = 0;
+sub note_added_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_added_semicolon_at_] = $line_number;
+ if ( $self->[_added_semicolon_count_] == 0 ) {
+ $self->[_first_added_semicolon_at_] = $line_number;
+ }
+ $self->[_added_semicolon_count_]++;
+ write_logfile_entry("Added ';' here\n");
+ return;
+}
- my $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
+sub note_deleted_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_deleted_semicolon_at_] = $line_number;
+ if ( $self->[_deleted_semicolon_count_] == 0 ) {
+ $self->[_first_deleted_semicolon_at_] = $line_number;
+ }
+ $self->[_deleted_semicolon_count_]++;
+ write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
+ return;
+}
- # NOTE: A future improvement would be to make it semicolon terminated
- # even if it does not have a semicolon but is followed by a closing
- # block brace. This would undo ci even for something like the
- # following, in which the final paren does not have a semicolon because
- # it is a possible weld location:
+sub note_embedded_tab {
+ my ( $self, $line_number ) = @_;
+ $self->[_embedded_tab_count_]++;
+ $self->[_last_embedded_tab_at_] = $line_number;
+ if ( !$self->[_first_embedded_tab_at_] ) {
+ $self->[_first_embedded_tab_at_] = $line_number;
+ }
- # if ($BOLD_MATH) {
- # (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # )
- # }
- #
+ if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry("Embedded tabs in quote or pattern\n");
+ }
+ return;
+}
- # MOJO: Set a flag if this lines begins with ')->'
- my $leading_paren_arrow = (
- $types_to_go[$ibeg] eq '}'
- && $tokens_to_go[$ibeg] eq ')'
- && (
- ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
- || ( $ibeg < $i_terminal - 1
- && $types_to_go[ $ibeg + 1 ] eq 'b'
- && $types_to_go[ $ibeg + 2 ] eq '->' )
- )
- );
+sub correct_lp_indentation {
- ##########################################################
- # Section 1: set a flag and a default indentation
- #
- # Most lines are indented according to the initial token.
- # But it is common to outdent to the level just after the
- # terminal token in certain cases...
- # adjust_indentation flag:
- # 0 - do not adjust
- # 1 - outdent
- # 2 - vertically align with opening token
- # 3 - indent
- ##########################################################
- my $adjust_indentation = 0;
- my $default_adjust_indentation = $adjust_indentation;
+ # When the -lp option is used, we need to make a last pass through
+ # each line to correct the indentation positions in case they differ
+ # from the predictions. This is necessary because perltidy uses a
+ # predictor/corrector method for aligning with opening parens. The
+ # predictor is usually good, but sometimes stumbles. The corrector
+ # tries to patch things up once the actual opening paren locations
+ # are known.
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $do_not_pad = 0;
- my (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- );
+ # Note on flag '$do_not_pad':
+ # We want to avoid a situation like this, where the aligner inserts
+ # whitespace before the '=' to align it with a previous '=', because
+ # otherwise the parens might become mis-aligned in a situation like
+ # this, where the '=' has become aligned with the previous line,
+ # pushing the opening '(' forward beyond where we want it.
+ #
+ # $mkFloor::currentRoom = '';
+ # $mkFloor::c_entry = $c->Entry(
+ # -width => '10',
+ # -relief => 'sunken',
+ # ...
+ # );
+ #
+ # We leave it to the aligner to decide how to do this.
- my $type_beg = $types_to_go[$ibeg];
- my $token_beg = $tokens_to_go[$ibeg];
- my $K_beg = $K_to_go[$ibeg];
- my $ibeg_weld_fix = $ibeg;
+ # first remove continuation indentation if appropriate
+ my $max_line = @{$ri_first} - 1;
- # QW PATCH 2 (Testing)
- # At an isolated closing token of a qw quote which is welded to
- # a following closing token, we will locally change its type to
- # be the same as its token. This will allow formatting to be the
- # same as for an ordinary closing token.
+ # looking at each line of this batch..
+ my ( $ibeg, $iend );
+ foreach my $line ( 0 .. $max_line ) {
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
- # For -lp formatting se use $ibeg_weld_fix to get around the problem
- # that with -lp type formatting the opening and closing tokens to not
- # have sequence numbers.
- if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) {
- my $K_next_nonblank = $self->K_next_code($K_beg);
- if ( defined($K_next_nonblank) ) {
- my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
- my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
- my $welded = $self->weld_len_left( $type_sequence, $token );
- if ($welded) {
- my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
- if ( $itest <= $max_index_to_go ) {
- $ibeg_weld_fix = $itest;
+ # looking at each token in this output line..
+ foreach my $i ( $ibeg .. $iend ) {
+
+ # How many space characters to place before this token
+ # for special alignment. Actual padding is done in the
+ # continue block.
+
+ # looking for next unvisited indentation item
+ my $indentation = $leading_spaces_to_go[$i];
+ if ( !$indentation->get_marked() ) {
+ $indentation->set_marked(1);
+
+ # looking for indentation item for which we are aligning
+ # with parens, braces, and brackets
+ next unless ( $indentation->get_align_paren() );
+
+ # skip closed container on this line
+ if ( $i > $ibeg ) {
+ my $im = max( $ibeg, $iprev_to_go[$i] );
+ if ( $type_sequence_to_go[$im]
+ && $mate_index_to_go[$im] <= $iend )
+ {
+ next;
}
- $type_beg = ')'; ##$token_beg;
}
- }
- }
- # if we are at a closing token of some type..
- if ( $type_beg =~ /^[\)\}\]R]$/ ) {
+ if ( $line == 1 && $i == $ibeg ) {
+ $do_not_pad = 1;
+ }
- # get the indentation of the line containing the corresponding
- # opening token
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
- $ri_last, $rindentation_list );
+ # Ok, let's see what the error is and try to fix it
+ my $actual_pos;
+ my $predicted_pos = $indentation->get_spaces();
+ if ( $i > $ibeg ) {
- # First set the default behavior:
- if (
+ # token is mid-line - use length to previous token
+ $actual_pos = total_line_length( $ibeg, $i - 1 );
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
- $is_semicolon_terminated
+ # for mid-line token, we must check to see if all
+ # additional lines have continuation indentation,
+ # and remove it if so. Otherwise, we do not get
+ # good alignment.
+ my $closing_index = $indentation->get_closed();
+ if ( $closing_index > $iend ) {
+ my $ibeg_next = $ri_first->[ $line + 1 ];
+ if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
+ $self->undo_lp_ci( $line, $i, $closing_index,
+ $ri_first, $ri_last );
+ }
+ }
+ }
+ elsif ( $line > 0 ) {
- # and 'cuddled parens' of the form: ")->pack("
- # Bug fix for RT #123749]: the types here were
- # incorrectly '(' and ')'. Corrected to be '{' and '}'
- || (
- $terminal_type eq '{'
- && $type_beg eq '}'
- && ( $nesting_depth_to_go[$iend] + 1 ==
- $nesting_depth_to_go[$ibeg] )
- )
+ # handle case where token starts a new line;
+ # use length of previous line
+ my $ibegm = $ri_first->[ $line - 1 ];
+ my $iendm = $ri_last->[ $line - 1 ];
+ $actual_pos = total_line_length( $ibegm, $iendm );
- # remove continuation indentation for any line like
- # } ... {
- # or without ending '{' and unbalanced, such as
- # such as '}->{$operator}'
- || (
- $type_beg eq '}'
+ # follow -pt style
+ ++$actual_pos
+ if ( $types_to_go[ $iendm + 1 ] eq 'b' );
+ }
+ else {
- && ( $types_to_go[$iend] eq '{'
- || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
- )
+ # token is first character of first line of batch
+ $actual_pos = $predicted_pos;
+ }
- # and when the next line is at a lower indentation level
- # PATCH: and only if the style allows undoing continuation
- # for all closing token types. We should really wait until
- # the indentation of the next line is known and then make
- # a decision, but that would require another pass.
- || ( $level_jump < 0 && !$some_closing_token_indentation )
+ my $move_right = $actual_pos - $predicted_pos;
- # Patch for -wn=2, multiple welded closing tokens
- || ( $i_terminal > $ibeg
- && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
+ # done if no error to correct (gnu2.t)
+ if ( $move_right == 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
- )
- {
- $adjust_indentation = 1;
- }
+ # if we have not seen closure for this indentation in
+ # this batch, we can only pass on a request to the
+ # vertical aligner
+ my $closing_index = $indentation->get_closed();
- # outdent something like '),'
- if (
- $terminal_type eq ','
+ if ( $closing_index < 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
- # Removed this constraint for -wn
- # OLD: allow just one character before the comma
- # && $i_terminal == $ibeg + 1
+ # If necessary, look ahead to see if there is really any
+ # leading whitespace dependent on this whitespace, and
+ # also find the longest line using this whitespace.
+ # Since it is always safe to move left if there are no
+ # dependents, we only need to do this if we may have
+ # dependent nodes or need to move right.
- # require LIST environment; otherwise, we may outdent too much -
- # this can happen in calls without parentheses (overload.t);
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- )
- {
- $adjust_indentation = 1;
- }
+ my $right_margin = 0;
+ my $have_child = $indentation->get_have_child();
- # undo continuation indentation of a terminal closing token if
- # it is the last token before a level decrease. This will allow
- # a closing token to line up with its opening counterpart, and
- # avoids an indentation jump larger than 1 level.
- if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
- && $i_terminal == $ibeg
- && defined($K_beg) )
- {
- my $K_next_nonblank = $self->K_next_code($K_beg);
+ my %saw_indentation;
+ my $line_count = 1;
+ $saw_indentation{$indentation} = $indentation;
- # Patch for RT#131115: honor -bli flag at closing brace
- my $is_bli =
- $rOpts->{'brace-left-and-indent'}
- && $block_type_to_go[$i_terminal]
- && $block_type_to_go[$i_terminal] =~ /$bli_pattern/;
+ if ( $have_child || $move_right > 0 ) {
+ $have_child = 0;
+ my $max_length = 0;
+ if ( $i == $ibeg ) {
+ $max_length = total_line_length( $ibeg, $iend );
+ }
- if ( !$is_bli && defined($K_next_nonblank) ) {
- my $lev = $rLL->[$K_beg]->[_LEVEL_];
- my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
- $adjust_indentation = 1 if ( $level_next < $lev );
+ # look ahead at the rest of the lines of this batch..
+ foreach my $line_t ( $line + 1 .. $max_line ) {
+ my $ibeg_t = $ri_first->[$line_t];
+ my $iend_t = $ri_last->[$line_t];
+ last if ( $closing_index <= $ibeg_t );
+
+ # remember all different indentation objects
+ my $indentation_t = $leading_spaces_to_go[$ibeg_t];
+ $saw_indentation{$indentation_t} = $indentation_t;
+ $line_count++;
+
+ # remember longest line in the group
+ my $length_t = total_line_length( $ibeg_t, $iend_t );
+ if ( $length_t > $max_length ) {
+ $max_length = $length_t;
+ }
+ }
+ $right_margin = maximum_line_length($ibeg) - $max_length;
+ if ( $right_margin < 0 ) { $right_margin = 0 }
}
- # Patch for RT #96101, in which closing brace of anonymous subs
- # was not outdented. We should look ahead and see if there is
- # a level decrease at the next token (i.e., a closing token),
- # but right now we do not have that information. For now
- # we see if we are in a list, and this works well.
- # See test files 'sub*.t' for good test cases.
- if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first,
- $ri_last, $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
- }
-
- # YVES patch 1 of 2:
- # Undo ci of line with leading closing eval brace,
- # but not beyond the indention of the line with
- # the opening brace.
- if ( $block_type_to_go[$ibeg] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
+ my $first_line_comma_count =
+ grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
+ my $comma_count = $indentation->get_comma_count();
+ my $arrow_count = $indentation->get_arrow_count();
- $default_adjust_indentation = $adjust_indentation;
+ # This is a simple approximate test for vertical alignment:
+ # if we broke just after an opening paren, brace, bracket,
+ # and there are 2 or more commas in the first line,
+ # and there are no '=>'s,
+ # then we are probably vertically aligned. We could set
+ # an exact flag in sub scan_list, but this is good
+ # enough.
+ my $indentation_count = keys %saw_indentation;
+ my $is_vertically_aligned =
+ ( $i == $ibeg
+ && $first_line_comma_count > 1
+ && $indentation_count == 1
+ && ( $arrow_count == 0 || $arrow_count == $line_count ) );
- # Now modify default behavior according to user request:
- # handle option to indent non-blocks of the form ); }; ];
- # But don't do special indentation to something like ')->pack('
- if ( !$block_type_to_go[$ibeg] ) {
+ # Make the move if possible ..
+ if (
- # Note that logical padding has already been applied, so we may
- # need to remove some spaces to get a valid hash key.
- my $tok = $tokens_to_go[$ibeg];
- my $cti = $closing_token_indentation{$tok};
- if ( !defined($cti) ) {
+ # we can always move left
+ $move_right < 0
- # $cti may not be defined for several reasons.
- # -padding may have been applied so the character
- # has a length > 1
- # - we may have welded to a closing quote token.
- # Here is an example (perltidy -wn):
- # __PACKAGE__->load_components( qw(
- # > Core
- # >
- # > ) );
- $adjust_indentation = 0;
+ # but we should only move right if we are sure it will
+ # not spoil vertical alignment
+ || ( $comma_count == 0 )
+ || ( $comma_count > 0 && !$is_vertically_aligned )
+ )
+ {
+ my $move =
+ ( $move_right <= $right_margin )
+ ? $move_right
+ : $right_margin;
- }
- elsif ( $cti == 1 ) {
- if ( $i_terminal <= $ibeg + 1
- || $is_semicolon_terminated )
- {
- $adjust_indentation = 2;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 2 ) {
- if ($is_semicolon_terminated) {
- $adjust_indentation = 3;
- }
- else {
- $adjust_indentation = 0;
+ foreach ( keys %saw_indentation ) {
+ $saw_indentation{$_}
+ ->permanently_decrease_available_spaces( -$move );
}
}
- elsif ( $cti == 3 ) {
- $adjust_indentation = 3;
- }
- }
- # handle option to indent blocks
- else {
- if (
- $rOpts->{'indent-closing-brace'}
- && (
- $i_terminal == $ibeg # isolated terminal '}'
- || $is_semicolon_terminated
- )
- ) # } xxxx ;
- {
- $adjust_indentation = 3;
+ # Otherwise, record what we want and the vertical aligner
+ # will try to recover it.
+ else {
+ $indentation->set_recoverable_spaces($move_right);
}
}
}
+ }
+ return $do_not_pad;
+}
- # if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ($rpatterns->[0] =~ /^qb*;$/
- && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
- {
- if ( $closing_token_indentation{$1} == 0 ) {
- $adjust_indentation = 1;
- }
- else {
- $adjust_indentation = 3;
- }
- }
+sub undo_lp_ci {
- # if line begins with a ':', align it with any
- # previous line leading with corresponding ?
- elsif ( $types_to_go[$ibeg] eq ':' ) {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- if ($is_leading) { $adjust_indentation = 2; }
- }
+ # If there is a single, long parameter within parens, like this:
+ #
+ # $self->command( "/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?" );
+ #
+ # we can remove the continuation indentation of the 2nd and higher lines
+ # to achieve this effect, which is more pleasing:
+ #
+ # $self->command("/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?");
- ##########################################################
- # Section 2: set indentation according to flag set above
- #
- # Select the indentation object to define leading
- # whitespace. If we are outdenting something like '} } );'
- # then we want to use one level below the last token
- # ($i_terminal) in order to get it to fully outdent through
- # all levels.
- ##########################################################
- my $indentation;
- my $lev;
- my $level_end = $levels_to_go[$iend];
+ my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
+ @_;
+ my $max_line = @{$ri_first} - 1;
- if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- $lev = $levels_to_go[$ibeg];
- }
- elsif ( $adjust_indentation == 1 ) {
+ # must be multiple lines
+ return unless $max_line > $line_open;
- # Change the indentation to be that of a different token on the line
- # Previously, the indentation of the terminal token was used:
- # OLD CODING:
- # $indentation = $reduced_spaces_to_go[$i_terminal];
- # $lev = $levels_to_go[$i_terminal];
+ my $lev_start = $levels_to_go[$i_start];
+ my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
- # Generalization for MOJO:
- # Use the lowest level indentation of the tokens on the line.
- # For example, here we can use the indentation of the ending ';':
- # } until ($selection > 0 and $selection < 10); # ok to use ';'
- # But this will not outdent if we use the terminal indentation:
- # )->then( sub { # use indentation of the ->, not the {
- # Warning: reduced_spaces_to_go[] may be a reference, do not
- # do numerical checks with it
+ # see if all additional lines in this container have continuation
+ # indentation
+ my $n;
+ my $line_1 = 1 + $line_open;
+ for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ if ( $ibeg eq $closing_index ) { $n--; last }
+ return if ( $lev_start != $levels_to_go[$ibeg] );
+ return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
+ last if ( $closing_index <= $iend );
+ }
- my $i_ind = $ibeg;
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- while ( $i_ind < $i_terminal ) {
- $i_ind++;
- if ( $levels_to_go[$i_ind] < $lev ) {
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- }
- }
- }
+ # we can reduce the indentation of all continuation lines
+ my $continuation_line_count = $n - $line_open;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
+ return;
+}
- # handle indented closing token which aligns with opening token
- elsif ( $adjust_indentation == 2 ) {
+###############################################
+# CODE SECTION 10: Code to break long statments
+###############################################
- # handle option to align closing token with opening token
- $lev = $levels_to_go[$ibeg];
+sub set_continuation_breaks {
- # calculate spaces needed to align with opening token
- my $space_count =
- get_spaces($opening_indentation) + $opening_offset;
+ # Called once per batch to set breaks in long lines.
- # Indent less than the previous line.
- #
- # Problem: For -lp we don't exactly know what it was if there
- # were recoverable spaces sent to the aligner. A good solution
- # would be to force a flush of the vertical alignment buffer, so
- # that we would know. For now, this rule is used for -lp:
- #
- # When the last line did not start with a closing token we will
- # be optimistic that the aligner will recover everything wanted.
- #
- # This rule will prevent us from breaking a hierarchy of closing
- # tokens, and in a worst case will leave a closing paren too far
- # indented, but this is better than frequently leaving it not
- # indented enough.
- my $last_spaces = get_spaces($last_indentation_written);
- if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
- $last_spaces +=
- get_recoverable_spaces($last_indentation_written);
- }
+ # Define an array of indexes for inserting newline characters to
+ # keep the line lengths below the maximum desired length. There is
+ # an implied break after the last token, so it need not be included.
- # reset the indentation to the new space count if it works
- # only options are all or none: nothing in-between looks good
- $lev = $levels_to_go[$ibeg];
- if ( $space_count < $last_spaces ) {
- if ($rOpts_line_up_parentheses) {
- my $lev = $levels_to_go[$ibeg];
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
- }
- }
+ # Method:
+ # This routine is part of series of routines which adjust line
+ # lengths. It is only called if a statement is longer than the
+ # maximum line length, or if a preliminary scanning located
+ # desirable break points. Sub scan_list has already looked at
+ # these tokens and set breakpoints (in array
+ # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
+ # after commas, after opening parens, and before closing parens).
+ # This routine will honor these breakpoints and also add additional
+ # breakpoints as necessary to keep the line length below the maximum
+ # requested. It bases its decision on where the 'bond strength' is
+ # lowest.
- # revert to default if it doesn't work
- else {
- $space_count = leading_spaces_to_go($ibeg);
- if ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- }
- elsif ( $default_adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
- }
- }
- }
+ # Output: returns references to the arrays:
+ # @i_first
+ # @i_last
+ # which contain the indexes $i of the first and last tokens on each
+ # line.
- # Full indentaion of closing tokens (-icb and -icp or -cti=2)
- else {
+ # In addition, the array:
+ # $forced_breakpoint_to_go[$i]
+ # may be updated to be =1 for any index $i after which there must be
+ # a break. This signals later routines not to undo the breakpoint.
- # handle -icb (indented closing code block braces)
- # Updated method for indented block braces: indent one full level if
- # there is no continuation indentation. This will occur for major
- # structures such as sub, if, else, but not for things like map
- # blocks.
- #
- # Note: only code blocks without continuation indentation are
- # handled here (if, else, unless, ..). In the following snippet,
- # the terminal brace of the sort block will have continuation
- # indentation as shown so it will not be handled by the coding
- # here. We would have to undo the continuation indentation to do
- # this, but it probably looks ok as is. This is a possible future
- # update for semicolon terminated lines.
- #
- # if ($sortby eq 'date' or $sortby eq 'size') {
- # @files = sort {
- # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
- # or $a cmp $b
- # } @files;
- # }
- #
- if ( $block_type_to_go[$ibeg]
- && $ci_levels_to_go[$i_terminal] == 0 )
- {
- my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
- $indentation = $spaces + $rOpts_indent_columns;
+ my ( $self, $saw_good_break ) = @_;
+ my $DEBUG_BREAKPOINTS = 0;
- # NOTE: for -lp we could create a new indentation object, but
- # there is probably no need to do it
- }
+ my @i_first = (); # the first index to output
+ my @i_last = (); # the last index to output
+ my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
+ if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
- # handle -icp and any -icb block braces which fall through above
- # test such as the 'sort' block mentioned above.
- else {
+ my $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
- # There are currently two ways to handle -icp...
- # One way is to use the indentation of the previous line:
- # $indentation = $last_indentation_written;
+ $self->set_bond_strengths();
- # The other way is to use the indentation that the previous line
- # would have had if it hadn't been adjusted:
- $indentation = $last_unadjusted_indentation;
+ my $imin = 0;
+ my $imax = $max_index_to_go;
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
+ my $i_begin = $imin; # index for starting next iteration
- # Current method: use the minimum of the two. This avoids
- # inconsistent indentation.
- if ( get_spaces($last_indentation_written) <
- get_spaces($indentation) )
- {
- $indentation = $last_indentation_written;
- }
- }
+ my $leading_spaces = leading_spaces_to_go($imin);
+ my $line_count = 0;
+ my $last_break_strength = NO_BREAK;
+ my $i_last_break = -1;
+ my $max_bias = 0.001;
+ my $tiny_bias = 0.0001;
+ my $leading_alignment_token = "";
+ my $leading_alignment_type = "";
- # use previous indentation but use own level
- # to cause list to be flushed properly
- $lev = $levels_to_go[$ibeg];
- }
+ # see if any ?/:'s are in order
+ my $colons_in_order = 1;
+ my $last_tok = "";
+ my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
+ my $colon_count = @colon_list;
+ foreach (@colon_list) {
+ if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
+ $last_tok = $_;
+ }
- # remember indentation except for multi-line quotes, which get
- # no indentation
- unless ( $ibeg == 0 && $starting_in_quote ) {
- $last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
- $last_leading_token = $tokens_to_go[$ibeg];
- }
+ # This is a sufficient but not necessary condition for colon chain
+ my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
- # be sure lines with leading closing tokens are not outdented more
- # than the line which contained the corresponding opening token.
+ #-------------------------------------------------------
+ # BEGINNING of main loop to set continuation breakpoints
+ # Keep iterating until we reach the end
+ #-------------------------------------------------------
+ while ( $i_begin <= $imax ) {
+ my $lowest_strength = NO_BREAK;
+ my $starting_sum = $summed_lengths_to_go[$i_begin];
+ my $i_lowest = -1;
+ my $i_test = -1;
+ my $lowest_next_token = '';
+ my $lowest_next_type = 'b';
+ my $i_lowest_next_nonblank = -1;
- #############################################################
- # updated per bug report in alex_bug.pl: we must not
- # mess with the indentation of closing logical braces so
- # we must treat something like '} else {' as if it were
- # an isolated brace
- #############################################################
- my $is_isolated_block_brace = $block_type_to_go[$ibeg]
- && ( $i_terminal == $ibeg
- || $is_if_elsif_else_unless_while_until_for_foreach{
- $block_type_to_go[$ibeg] } );
+ #-------------------------------------------------------
+ # BEGINNING of inner loop to find the best next breakpoint
+ #-------------------------------------------------------
+ my $strength = NO_BREAK;
+ for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
+ my $type = $types_to_go[$i_test];
+ my $token = $tokens_to_go[$i_test];
+ my $next_type = $types_to_go[ $i_test + 1 ];
+ my $next_token = $tokens_to_go[ $i_test + 1 ];
+ my $i_next_nonblank = $inext_to_go[$i_test];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+ my $maximum_line_length = maximum_line_length($i_begin);
- # only do this for a ':; which is aligned with its leading '?'
- my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ # adjustments to the previous bond strength may have been made, and
+ # we must keep the bond strength of a token and its following blank
+ # the same;
+ my $last_strength = $strength;
+ $strength = $bond_strength_to_go[$i_test];
+ if ( $type eq 'b' ) { $strength = $last_strength }
- if (
- defined($opening_indentation)
- && !$leading_paren_arrow # MOJO
- && !$is_isolated_block_brace
- && !$is_unaligned_colon
- )
- {
- if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
- $indentation = $opening_indentation;
- }
- }
+ # use old breaks as a tie-breaker. For example to
+ # prevent blinkers with -pbp in this code:
- # remember the indentation of each line of this batch
- push @{$rindentation_list}, $indentation;
+##@keywords{
+## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
+## = ();
- # outdent lines with certain leading tokens...
- if (
+ # At the same time try to prevent a leading * in this code
+ # with the default formatting:
+ #
+## return
+## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
+## * ( $x**( $a - 1 ) )
+## * ( ( 1 - $x )**( $b - 1 ) );
- # must be first word of this batch
- $ibeg == 0
+ # reduce strength a bit to break ties at an old breakpoint ...
+ if (
+ $old_breakpoint_to_go[$i_test]
- # and ...
- && (
+ # which is a 'good' breakpoint, meaning ...
+ # we don't want to break before it
+ && !$want_break_before{$type}
- # certain leading keywords if requested
- (
- $rOpts->{'outdent-keywords'}
- && $types_to_go[$ibeg] eq 'k'
- && $outdent_keyword{ $tokens_to_go[$ibeg] }
- )
+ # and either we want to break before the next token
+ # or the next token is not short (i.e. not a '*', '/' etc.)
+ && $i_next_nonblank <= $imax
+ && ( $want_break_before{$next_nonblank_type}
+ || $token_lengths_to_go[$i_next_nonblank] > 2
+ || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
+ )
+ {
+ $strength -= $tiny_bias;
+ }
- # or labels if requested
- || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
-
- # or static block comments if requested
- || ( $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-static-block-comments'}
- && $is_static_block_comment )
- )
- )
-
- {
- my $space_count = leading_spaces_to_go($ibeg);
- if ( $space_count > 0 ) {
- $space_count -= $rOpts_continuation_indentation;
- $is_outdented_line = 1;
- if ( $space_count < 0 ) { $space_count = 0 }
-
- # do not promote a spaced static block comment to non-spaced;
- # this is not normally necessary but could be for some
- # unusual user inputs (such as -ci = -i)
- if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
- $space_count = 1;
- }
-
- if ($rOpts_line_up_parentheses) {
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
+ # otherwise increase strength a bit if this token would be at the
+ # maximum line length. This is necessary to avoid blinking
+ # in the above example when the -iob flag is added.
+ else {
+ my $len =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum;
+ if ( $len >= $maximum_line_length ) {
+ $strength += $tiny_bias;
}
}
- }
- return ( $indentation, $lev, $level_end, $terminal_type,
- $terminal_block_type, $is_semicolon_terminated,
- $is_outdented_line );
- }
-} ## end closure set_adjusted_indentation
-
-sub mate_index_to_go {
- my ( $self, $i ) = @_;
-
- # Return the matching index of a container or ternary pair
- # This is equivalent to the array @mate_index_to_go
- my $K = $K_to_go[$i];
- my $K_mate = $self->K_mate_index($K);
- my $i_mate = -1;
- if ( defined($K_mate) ) {
- $i_mate = $i + ( $K_mate - $K );
- if ( $i_mate < 0 || $i_mate > $max_index_to_go ) {
- $i_mate = -1;
- }
- }
- my $i_mate_alt = $mate_index_to_go[$i];
-
- # FIXME: Old Debug code which can be removed eventually
- if ( 0 && $i_mate_alt != $i_mate ) {
- my $tok = $tokens_to_go[$i];
- my $type = $types_to_go[$i];
- my $tok_mate = '*';
- my $type_mate = '*';
- if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) {
- $tok_mate = $tokens_to_go[$i_mate];
- $type_mate = $types_to_go[$i_mate];
- }
- my $seq = $type_sequence_to_go[$i];
- my $file = get_input_stream_name();
-
- Warn(
-"mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate"
- );
- }
- return $i_mate;
-}
-
-sub K_mate_index {
-
- # Given the index K of an opening or closing container, or ?/: ternary pair,
- # return the index K of the other member of the pair.
- my ( $self, $K ) = @_;
- return unless defined($K);
- my $rLL = $self->[_rLL_];
- my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_];
- return unless ($seqno);
-
- my $K_opening = $self->[_K_opening_container_]->{$seqno};
- if ( defined($K_opening) ) {
- if ( $K != $K_opening ) { return $K_opening }
- return $self->[_K_closing_container_]->{$seqno};
- }
-
- $K_opening = $self->[_K_opening_ternary_]->{$seqno};
- if ( defined($K_opening) ) {
- if ( $K != $K_opening ) { return $K_opening }
- return $self->[_K_closing_ternary_]->{$seqno};
- }
- return;
-}
-
-sub set_vertical_tightness_flags {
-
- my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
- $ending_in_quote, $closing_side_comment )
- = @_;
+ my $must_break = 0;
- # Define vertical tightness controls for the nth line of a batch.
- # We create an array of parameters which tell the vertical aligner
- # if we should combine this line with the next line to achieve the
- # desired vertical tightness. The array of parameters contains:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- #
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false. Will be
- # true if appropriate -vt flag is set. Otherwise, Will be
- # made true only for 2 line container in parens with -lp
- #
- # These flags are used by sub set_leading_whitespace in
- # the vertical aligner
+ # Force an immediate break at certain operators
+ # with lower level than the start of the line,
+ # unless we've already seen a better break.
+ #
+ ##############################################
+ # Note on an issue with a preceding ?
+ ##############################################
+ # We don't include a ? in the above list, but there may
+ # be a break at a previous ? if the line is long.
+ # Because of this we do not want to force a break if
+ # there is a previous ? on this line. For now the best way
+ # to do this is to not break if we have seen a lower strength
+ # point, which is probably a ?.
+ #
+ # Example of unwanted breaks we are avoiding at a '.' following a ?
+ # from pod2html using perltidy -gnu:
+ # )
+ # ? "\n<A NAME=\""
+ # . $value
+ # . "\">\n$text</A>\n"
+ # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
+ if (
+ (
+ $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
+ || ( $next_nonblank_type eq 'k'
+ && $next_nonblank_token =~ /^(and|or)$/ )
+ )
+ && ( $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_next_nonblank] )
+ && ( $strength <= $lowest_strength )
+ )
+ {
+ $self->set_forced_breakpoint($i_next_nonblank);
+ }
- my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
+ if (
- my $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
- my $rOpts_block_brace_vertical_tightness =
- $rOpts->{'block-brace-vertical-tightness'};
- my $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
+ # Try to put a break where requested by scan_list
+ $forced_breakpoint_to_go[$i_test]
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1:
- # Handle Lines 1 .. n-1 but not the last line
- # For non-BLOCK tokens, we will need to examine the next line
- # too, so we won't consider the last line.
- #--------------------------------------------------------------
- if ( $n < $n_last_line ) {
+ # break between ) { in a continued line so that the '{' can
+ # be outdented
+ # See similar logic in scan_list which catches instances
+ # where a line is just something like ') {'. We have to
+ # be careful because the corresponding block keyword might
+ # not be on the first line, such as 'for' here:
+ #
+ # eval {
+ # for ("a") {
+ # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
+ # }
+ # };
+ #
+ || (
+ $line_count
+ && ( $token eq ')' )
+ && ( $next_nonblank_type eq '{' )
+ && ($next_nonblank_block_type)
+ && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1a:
- # Look for Type 1, last token of this line is a non-block opening token
- #--------------------------------------------------------------
- my $ibeg_next = $ri_first->[ $n + 1 ];
- my $token_end = $tokens_to_go[$iend];
- my $iend_next = $ri_last->[ $n + 1 ];
- if (
- $type_sequence_to_go[$iend]
- && !$block_type_to_go[$iend]
- && $is_opening_token{$token_end}
- && (
- $opening_vertical_tightness{$token_end} > 0
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceding list is long and broken
+ && !(
+ $next_nonblank_block_type =~ /$ANYSUB_PATTERN/
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
- # allow 2-line method call to be closed up
- || ( $rOpts_line_up_parentheses
- && $token_end eq '('
- && $iend > $ibeg
- && $types_to_go[ $iend - 1 ] ne 'b' )
- )
- )
- {
+ && !$rOpts->{'opening-brace-always-on-right'}
+ )
- # avoid multiple jumps in nesting depth in one line if
- # requested
- my $ovt = $opening_vertical_tightness{$token_end};
- my $iend_next = $ri_last->[ $n + 1 ];
- unless (
- $ovt < 2
- && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
- $nesting_depth_to_go[$ibeg_next] )
+ # There is an implied forced break at a terminal opening brace
+ || ( ( $type eq '{' ) && ( $i_test == $imax ) )
)
{
- # If -vt flag has not been set, mark this as invalid
- # and aligner will validate it if it sees the closing paren
- # within 2 lines.
- my $valid_flag = $ovt;
- @{$rvertical_tightness_flags} =
- ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
+ # Forced breakpoints must sometimes be overridden, for example
+ # because of a side comment causing a NO_BREAK. It is easier
+ # to catch this here than when they are set.
+ if ( $strength < NO_BREAK - 1 ) {
+ $strength = $lowest_strength - $tiny_bias;
+ $must_break = 1;
+ }
}
- }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1b:
- # Look for Type 2, first token of next line is a non-block closing
- # token .. and be sure this line does not have a side comment
- #--------------------------------------------------------------
- my $token_next = $tokens_to_go[$ibeg_next];
- if ( $type_sequence_to_go[$ibeg_next]
- && !$block_type_to_go[$ibeg_next]
- && $is_closing_token{$token_next}
- && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
- {
- my $ovt = $opening_vertical_tightness{$token_next};
- my $cvt = $closing_vertical_tightness{$token_next};
+ # quit if a break here would put a good terminal token on
+ # the next line and we already have a possible break
if (
-
- # never append a trailing line like )->pack(
- # because it will throw off later alignment
- (
- $nesting_depth_to_go[$ibeg_next] ==
- $nesting_depth_to_go[ $iend_next + 1 ] + 1
- )
+ !$must_break
+ && ( $next_nonblank_type =~ /^[\;\,]$/ )
&& (
- $cvt == 2
- || (
- $container_environment_to_go[$ibeg_next] ne 'LIST'
- && (
- $cvt == 1
-
- # allow closing up 2-line method calls
- || ( $rOpts_line_up_parentheses
- && $token_next eq ')' )
- )
- )
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
+ $starting_sum
+ ) > $maximum_line_length
)
)
{
-
- # decide which trailing closing tokens to append..
- my $ok = 0;
- if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
- else {
- my $str = join( '',
- @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
-
- # append closing token if followed by comment or ';'
- if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
- }
-
- if ($ok) {
- my $valid_flag = $cvt;
- @{$rvertical_tightness_flags} = (
- 2,
- $tightness{$token_next} == 2 ? 0 : 1,
- $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
- }
- }
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1c:
- # Implement the Opening Token Right flag (Type 2)..
- # If requested, move an isolated trailing opening token to the end of
- # the previous line which ended in a comma. We could do this
- # in sub recombine_breakpoints but that would cause problems
- # with -lp formatting. The problem is that indentation will
- # quickly move far to the right in nested expressions. By
- # doing it after indentation has been set, we avoid changes
- # to the indentation. Actual movement of the token takes place
- # in sub valign_output_step_B.
- #--------------------------------------------------------------
- if (
- $opening_token_right{ $tokens_to_go[$ibeg_next] }
-
- # previous line is not opening
- # (use -sot to combine with it)
- && !$is_opening_token{$token_end}
-
- # previous line ended in one of these
- # (add other cases if necessary; '=>' and '.' are not necessary
- && !$block_type_to_go[$ibeg_next]
-
- # this is a line with just an opening token
- && ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
-
- # looks bad if we align vertically with the wrong container
- && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1d:
- # Stacking of opening and closing tokens (Type 2)
- #--------------------------------------------------------------
- my $stackable;
- my $token_beg_next = $tokens_to_go[$ibeg_next];
-
- # patch to make something like 'qw(' behave like an opening paren
- # (aran.t)
- if ( $types_to_go[$ibeg_next] eq 'q' ) {
- if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
- $token_beg_next = $1;
- }
- }
-
- if ( $is_closing_token{$token_end}
- && $is_closing_token{$token_beg_next} )
- {
- $stackable = $stack_closing_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
- elsif ($is_opening_token{$token_end}
- && $is_opening_token{$token_beg_next} )
- {
- $stackable = $stack_opening_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
-
- if ($stackable) {
-
- my $is_semicolon_terminated;
- if ( $n + 1 == $n_last_line ) {
- my ( $terminal_type, $i_terminal ) =
- $self->terminal_type_i( $ibeg_next, $iend_next );
- $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend_next] <
- $nesting_depth_to_go[$ibeg_next];
+ last if ( $i_lowest >= 0 );
}
- # this must be a line with just an opening token
- # or end in a semicolon
+ # Avoid a break which would strand a single punctuation
+ # token. For example, we do not want to strand a leading
+ # '.' which is followed by a long quoted string.
+ # But note that we do want to do this with -extrude (l=1)
+ # so please test any changes to this code on -extrude.
if (
- $is_semicolon_terminated
- || ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
+ !$must_break
+ && ( $i_test == $i_begin )
+ && ( $i_test < $imax )
+ && ( $token eq $type )
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum
+ ) < $maximum_line_length
+ )
)
{
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
+ $i_test = min( $imax, $inext_to_go[$i_test] );
+ redo;
}
- }
- }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 2:
- # Handle type 3, opening block braces on last line of the batch
- # Check for a last line with isolated opening BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
- && $types_to_go[$iend] eq '{'
- && $block_type_to_go[$iend] =~
- /$block_brace_vertical_tightness_pattern/ )
- {
- @{$rvertical_tightness_flags} =
- ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
- }
+ if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
+ {
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 3:
- # Handle type 4, a closing block brace on the last line of the batch Check
- # for a last line with isolated closing BLOCK curly
- # Patch: added a check for any new closing side comment which the
- # -csc option may generate. If it exists, there will be a side comment
- # so we cannot combine with a brace on the next line. This issue
- # occurs for the combination -scbb and -csc is used.
- #--------------------------------------------------------------
- elsif ($rOpts_stack_closing_block_brace
- && $ibeg eq $iend
- && $block_type_to_go[$iend]
- && $types_to_go[$iend] eq '}'
- && ( !$closing_side_comment || $n < $n_last_line ) )
- {
- my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
- @{$rvertical_tightness_flags} =
- ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
- }
+ # break at previous best break if it would have produced
+ # a leading alignment of certain common tokens, and it
+ # is different from the latest candidate break
+ last
+ if ($leading_alignment_type);
+
+ # Force at least one breakpoint if old code had good
+ # break It is only called if a breakpoint is required or
+ # desired. This will probably need some adjustments
+ # over time. A goal is to try to be sure that, if a new
+ # side comment is introduced into formatted text, then
+ # the same breakpoints will occur. scbreak.t
+ last
+ if (
+ $i_test == $imax # we are at the end
+ && !get_forced_breakpoint_count()
+ && $saw_good_break # old line had good break
+ && $type =~ /^[#;\{]$/ # and this line ends in
+ # ';' or side comment
+ && $i_last_break < 0 # and we haven't made a break
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $imax - 1 # (but not just before this ;)
+ && $strength - $lowest_strength < 0.5 * WEAK # and it's good
+ );
+
+ # Do not skip past an important break point in a short final
+ # segment. For example, without this check we would miss the
+ # break at the final / in the following code:
+ #
+ # $depth_stop =
+ # ( $tau * $mass_pellet * $q_0 *
+ # ( 1. - exp( -$t_stop / $tau ) ) -
+ # 4. * $pi * $factor * $k_ice *
+ # ( $t_melt - $t_ice ) *
+ # $r_pellet *
+ # $t_stop ) /
+ # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
+ #
+ if ( $line_count > 2
+ && $i_lowest < $i_test
+ && $i_test > $imax - 2
+ && $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_lowest]
+ && $lowest_strength < $last_break_strength - .5 * WEAK )
+ {
+ # Make this break for math operators for now
+ my $ir = $inext_to_go[$i_lowest];
+ my $il = $iprev_to_go[$ir];
+ last
+ if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
+ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
+ }
- # pack in the sequence numbers of the ends of this line
- $rvertical_tightness_flags->[4] =
- $self->get_seqno( $ibeg, $ending_in_quote );
- $rvertical_tightness_flags->[5] =
- $self->get_seqno( $iend, $ending_in_quote );
- return $rvertical_tightness_flags;
-}
+ # Update the minimum bond strength location
+ $lowest_strength = $strength;
+ $i_lowest = $i_test;
+ $lowest_next_token = $next_nonblank_token;
+ $lowest_next_type = $next_nonblank_type;
+ $i_lowest_next_nonblank = $i_next_nonblank;
+ last if $must_break;
-sub get_seqno {
+ # set flags to remember if a break here will produce a
+ # leading alignment of certain common tokens
+ if ( $line_count > 0
+ && $i_test < $imax
+ && ( $lowest_strength - $last_break_strength <= $max_bias )
+ )
+ {
+ my $i_last_end = $iprev_to_go[$i_begin];
+ my $tok_beg = $tokens_to_go[$i_begin];
+ my $type_beg = $types_to_go[$i_begin];
+ if (
- # get opening and closing sequence numbers of a token for the vertical
- # aligner. Assign qw quotes a value to allow qw opening and closing tokens
- # to be treated somewhat like opening and closing tokens for stacking
- # tokens by the vertical aligner.
- my ( $self, $ii, $ending_in_quote ) = @_;
+ # check for leading alignment of certain tokens
+ (
+ $tok_beg eq $next_nonblank_token
+ && $is_chain_operator{$tok_beg}
+ && ( $type_beg eq 'k'
+ || $type_beg eq $tok_beg )
+ && $nesting_depth_to_go[$i_begin] >=
+ $nesting_depth_to_go[$i_next_nonblank]
+ )
- my $rLL = $self->[_rLL_];
- my $this_batch = $self->[_this_batch_];
- my $rK_to_go = $this_batch->[_rK_to_go_];
+ || ( $tokens_to_go[$i_last_end] eq $token
+ && $is_chain_operator{$token}
+ && ( $type eq 'k' || $type eq $token )
+ && $nesting_depth_to_go[$i_last_end] >=
+ $nesting_depth_to_go[$i_test] )
+ )
+ {
+ $leading_alignment_token = $next_nonblank_token;
+ $leading_alignment_type = $next_nonblank_type;
+ }
+ }
+ }
- my $KK = $rK_to_go->[$ii];
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $too_long = ( $i_test >= $imax );
+ if ( !$too_long ) {
+ my $next_length =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 2 ] -
+ $starting_sum;
+ $too_long = $next_length > $maximum_line_length;
- if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
- my $SEQ_QW = -1;
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $ii > 0 ) {
- $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
- }
- else {
- if ( !$ending_in_quote ) {
- $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
+ # To prevent blinkers we will avoid leaving a token exactly at
+ # the line length limit unless it is the last token or one of
+ # several "good" types.
+ #
+ # The following code was a blinker with -pbp before this
+ # modification:
+## $last_nonblank_token eq '('
+## && $is_indirect_object_taker{ $paren_type
+## [$paren_depth] }
+ # The issue causing the problem is that if the
+ # term [$paren_depth] gets broken across a line then
+ # the whitespace routine doesn't see both opening and closing
+ # brackets and will format like '[ $paren_depth ]'. This
+ # leads to an oscillation in length depending if we break
+ # before the closing bracket or not.
+ if ( !$too_long
+ && $i_test + 1 < $imax
+ && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
+ {
+ $too_long = $next_length >= $maximum_line_length;
+ }
}
- }
- }
- return ($seqno);
-}
-{ ## begin closure set_vertical_alignment_markers
- my %is_vertical_alignment_type;
- my %is_not_vertical_alignment_token;
- my %is_vertical_alignment_keyword;
- my %is_terminal_alignment_type;
- my %is_low_level_alignment_token;
+ $DEBUG_BREAKPOINTS
+ && do {
+ my $ltok = $token;
+ my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
+ my $i_testp2 = $i_test + 2;
+ if ( $i_testp2 > $max_index_to_go + 1 ) {
+ $i_testp2 = $max_index_to_go + 1;
+ }
+ if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
+ if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
+ print STDOUT
+"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
+ };
- BEGIN {
+ # allow one extra terminal token after exceeding line length
+ # if it would strand this token.
+ if ( $rOpts_fuzzy_line_length
+ && $too_long
+ && $i_lowest == $i_test
+ && $token_lengths_to_go[$i_test] > 1
+ && $next_nonblank_type =~ /^[\;\,]$/ )
+ {
+ $too_long = 0;
+ }
- my @q;
+ last
+ if (
+ ( $i_test == $imax ) # we're done if no more tokens,
+ || (
+ ( $i_lowest >= 0 ) # or no more space and we have a break
+ && $too_long
+ )
+ );
+ }
- # Replaced =~ and // in the list. // had been removed in RT 119588
- @q = qw#
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- { ? : => && || ~~ !~~ =~ !~ // <=> ->
- #;
- @is_vertical_alignment_type{@q} = (1) x scalar(@q);
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint
+ # Now decide exactly where to put the breakpoint
+ #-------------------------------------------------------
- # These 'tokens' are not aligned. We need this to remove [
- # from the above list because it has type ='{'
- @q = qw([);
- @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
+ # it's always ok to break at imax if no other break was found
+ if ( $i_lowest < 0 ) { $i_lowest = $imax }
- # these are the only types aligned at a line end
- @q = qw(&& || =>);
- @is_terminal_alignment_type{@q} = (1) x scalar(@q);
+ # semi-final index calculation
+ my $i_next_nonblank = $inext_to_go[$i_lowest];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- # these tokens only align at line level
- @q = ( '{', '(' );
- @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+ #-------------------------------------------------------
+ # ?/: rule 1 : if a break here will separate a '?' on this
+ # line from its closing ':', then break at the '?' instead.
+ #-------------------------------------------------------
+ foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
+ next unless ( $tokens_to_go[$i] eq '?' );
- # eq and ne were removed from this list to improve alignment chances
- @q = qw(if unless and or err for foreach while until);
- @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
- }
+ # do not break if probable sequence of ?/: statements
+ next if ($is_colon_chain);
- sub set_vertical_alignment_markers {
+ # do not break if statement is broken by side comment
+ next
+ if ( $tokens_to_go[$max_index_to_go] eq '#'
+ && $self->terminal_type_i( 0, $max_index_to_go ) !~
+ /^[\;\}]$/ );
- # This routine takes the first step toward vertical alignment of the
- # lines of output text. It looks for certain tokens which can serve as
- # vertical alignment markers (such as an '=').
- #
- # Method: We look at each token $i in this output batch and set
- # $ralignment_type_to_go->[$i] equal to those tokens at which we would
- # accept vertical alignment.
+ # no break needed if matching : is also on the line
+ next
+ if ( $mate_index_to_go[$i] >= 0
+ && $mate_index_to_go[$i] <= $i_next_nonblank );
- my ( $self, $ri_first, $ri_last ) = @_;
- my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ $i_lowest = $i;
+ if ( $want_break_before{'?'} ) { $i_lowest-- }
+ last;
+ }
- my $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- my $ralignment_type_to_go;
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint:
+ # Break the line after the token with index i=$i_lowest
+ #-------------------------------------------------------
- # Initialize the alignment array. Note that closing side comments can
- # insert up to 2 additional tokens beyond the original
- # $max_index_to_go, so we need to check ri_last for the last index.
- my $max_line = @{$ri_first} - 1;
- my $iend = $ri_last->[$max_line];
- if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
- for my $i ( 0 .. $iend ) {
- $ralignment_type_to_go->[$i] = '';
- }
+ # final index calculation
+ $i_next_nonblank = $inext_to_go[$i_lowest];
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- # nothing to do if we aren't allowed to change whitespace
- if ( !$rOpts_add_whitespace ) {
- return $ralignment_type_to_go;
- }
+ $DEBUG_BREAKPOINTS
+ && print STDOUT
+ "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
- # remember the index of last nonblank token before any sidecomment
- my $i_terminal = $max_index_to_go;
- if ( $types_to_go[$i_terminal] eq '#' ) {
- if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
- if ( $i_terminal > 0 ) { --$i_terminal }
- }
+ #-------------------------------------------------------
+ # ?/: rule 2 : if we break at a '?', then break at its ':'
+ #
+ # Note: this rule is also in sub scan_list to handle a break
+ # at the start and end of a line (in case breaks are dictated
+ # by side comments).
+ #-------------------------------------------------------
+ if ( $next_nonblank_type eq '?' ) {
+ $self->set_closing_breakpoint($i_next_nonblank);
+ }
+ elsif ( $types_to_go[$i_lowest] eq '?' ) {
+ $self->set_closing_breakpoint($i_lowest);
}
- # look at each line of this batch..
- my $last_vertical_alignment_before_index;
- my $vert_last_nonblank_type;
- my $vert_last_nonblank_token;
- my $vert_last_nonblank_block_type;
+ #-------------------------------------------------------
+ # ?/: rule 3 : if we break at a ':' then we save
+ # its location for further work below. We may need to go
+ # back and break at its '?'.
+ #-------------------------------------------------------
+ if ( $next_nonblank_type eq ':' ) {
+ push @i_colon_breaks, $i_next_nonblank;
+ }
+ elsif ( $types_to_go[$i_lowest] eq ':' ) {
+ push @i_colon_breaks, $i_lowest;
+ }
- foreach my $line ( 0 .. $max_line ) {
- my $ibeg = $ri_first->[$line];
- my $iend = $ri_last->[$line];
- $last_vertical_alignment_before_index = -1;
- $vert_last_nonblank_type = '';
- $vert_last_nonblank_token = '';
- $vert_last_nonblank_block_type = '';
+ # here we should set breaks for all '?'/':' pairs which are
+ # separated by this line
- # look at each token in this output line..
- my $level_beg = $levels_to_go[$ibeg];
- foreach my $i ( $ibeg .. $iend ) {
- my $alignment_type = '';
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
+ $line_count++;
- # do not align tokens at lower level then start of line
- # except for side comments
- if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
- && $types_to_go[$i] ne '#' )
- {
- $ralignment_type_to_go->[$i] = '';
- next;
- }
+ # save this line segment, after trimming blanks at the ends
+ push( @i_first,
+ ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
+ push( @i_last,
+ ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
- #--------------------------------------------------------
- # First see if we want to align BEFORE this token
- #--------------------------------------------------------
+ # set a forced breakpoint at a container opening, if necessary, to
+ # signal a break at a closing container. Excepting '(' for now.
+ if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
+ && !$forced_breakpoint_to_go[$i_lowest] )
+ {
+ $self->set_closing_breakpoint($i_lowest);
+ }
- # The first possible token that we can align before
- # is index 2 because: 1) it doesn't normally make sense to
- # align before the first token and 2) the second
- # token must be a blank if we are to align before
- # the third
- if ( $i < $ibeg + 2 ) { }
+ # get ready to go again
+ $i_begin = $i_lowest + 1;
+ $last_break_strength = $lowest_strength;
+ $i_last_break = $i_lowest;
+ $leading_alignment_token = "";
+ $leading_alignment_type = "";
+ $lowest_next_token = '';
+ $lowest_next_type = 'b';
- # must follow a blank token
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
+ if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
+ $i_begin++;
+ }
- # align a side comment --
- elsif ( $type eq '#' ) {
+ # update indentation size
+ if ( $i_begin <= $imax ) {
+ $leading_spaces = leading_spaces_to_go($i_begin);
+ }
+ }
- my $KK = $K_to_go[$i];
- my $sc_type = $rspecial_side_comment_type->{$KK};
+ #-------------------------------------------------------
+ # END of main loop to set continuation breakpoints
+ # Now go back and make any necessary corrections
+ #-------------------------------------------------------
- unless (
+ #-------------------------------------------------------
+ # ?/: rule 4 -- if we broke at a ':', then break at
+ # corresponding '?' unless this is a chain of ?: expressions
+ #-------------------------------------------------------
+ if (@i_colon_breaks) {
- # it is any specially marked side comment
- $sc_type
+ # using a simple method for deciding if we are in a ?/: chain --
+ # this is a chain if it has multiple ?/: pairs all in order;
+ # otherwise not.
+ # Note that if line starts in a ':' we count that above as a break
+ my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
- # or it is a static side comment
- || ( $rOpts->{'static-side-comments'}
- && $token =~ /$static_side_comment_pattern/ )
+ unless ($is_chain) {
+ my @insert_list = ();
+ foreach (@i_colon_breaks) {
+ my $i_question = $mate_index_to_go[$_];
+ if ( $i_question >= 0 ) {
+ if ( $want_break_before{'?'} ) {
+ $i_question = $iprev_to_go[$i_question];
+ }
- # or a closing side comment
- || ( $vert_last_nonblank_block_type
- && $token =~
- /$closing_side_comment_prefix_pattern/ )
- )
- {
- $alignment_type = $type;
- } ## Example of a static side comment
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
}
+ $self->insert_additional_breaks( \@insert_list, \@i_first,
+ \@i_last );
+ }
+ }
+ }
+ return ( \@i_first, \@i_last, $colon_count );
+}
- # otherwise, do not align two in a row to create a
- # blank field
- elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
+{ ## begin closure scan_list
- # align before one of these keywords
- # (within a line, since $i>1)
- elsif ( $type eq 'k' ) {
+ # These routines and variables are involved in finding good
+ # places to break long lists.
- # /^(if|unless|and|or|eq|ne)$/
- if ( $is_vertical_alignment_keyword{$token} ) {
- $alignment_type = $token;
- }
- }
+ my (
+ $block_type, $current_depth,
+ $depth, $i,
+ $i_last_nonblank_token, $last_colon_sequence_number,
+ $last_nonblank_token, $last_nonblank_type,
+ $last_nonblank_block_type, $last_old_breakpoint_count,
+ $minimum_depth, $next_nonblank_block_type,
+ $next_nonblank_token, $next_nonblank_type,
+ $old_breakpoint_count, $starting_breakpoint_count,
+ $starting_depth, $token,
+ $type, $type_sequence,
+ );
- # align before one of these types..
- # Note: add '.' after new vertical aligner is operational
- elsif ( $is_vertical_alignment_type{$type}
- && !$is_not_vertical_alignment_token{$token} )
- {
- $alignment_type = $token;
+ my (
+ @breakpoint_stack, @breakpoint_undo_stack,
+ @comma_index, @container_type,
+ @identifier_count_stack, @index_before_arrow,
+ @interrupted_list, @item_count_stack,
+ @last_comma_index, @last_dot_index,
+ @last_nonblank_type, @old_breakpoint_count_stack,
+ @opening_structure_index_stack, @rfor_semicolon_list,
+ @has_old_logical_breakpoints, @rand_or_list,
+ @i_equals,
+ );
- # Do not align a terminal token. Although it might
- # occasionally look ok to do this, this has been found to be
- # a good general rule. The main problems are:
- # (1) that the terminal token (such as an = or :) might get
- # moved far to the right where it is hard to see because
- # nothing follows it, and
- # (2) doing so may prevent other good alignments.
- # Current exceptions are && and || and =>
- if ( $i == $iend || $i >= $i_terminal ) {
- $alignment_type = ""
- unless ( $is_terminal_alignment_type{$type} );
- }
+ # these arrays must retain values between calls
+ my ( @has_broken_sublist, @dont_align, @want_comma_break );
- # Do not align leading ': (' or '. ('. This would prevent
- # alignment in something like the following:
- # $extra_space .=
- # ( $input_line_number < 10 ) ? " "
- # : ( $input_line_number < 100 ) ? " "
- # : "";
- # or
- # $code =
- # ( $case_matters ? $accessor : " lc($accessor) " )
- # . ( $yesno ? " eq " : " ne " )
+ sub initialize_scan_list {
+ @dont_align = ();
+ @has_broken_sublist = ();
+ @want_comma_break = ();
+ return;
+ }
- # Also, do not align a ( following a leading ? so we can
- # align something like this:
- # $converter{$_}->{ushortok} =
- # $PDL::IO::Pic::biggrays
- # ? ( m/GIF/ ? 0 : 1 )
- # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
- if ( $i == $ibeg + 2
- && $types_to_go[$ibeg] =~ /^[\.\:\?]$/
- && $types_to_go[ $i - 1 ] eq 'b' )
- {
- $alignment_type = "";
- }
+ # routine to define essential variables when we go 'up' to
+ # a new depth
+ sub check_for_new_minimum_depth {
+ my $depth = shift;
+ if ( $depth < $minimum_depth ) {
- # Certain tokens only align at the same level as the
- # initial line level
- if ( $is_low_level_alignment_token{$token}
- && $levels_to_go[$i] != $level_beg )
- {
- $alignment_type = "";
- }
+ $minimum_depth = $depth;
- # For a paren after keyword, only align something like this:
- # if ( $a ) { &a }
- # elsif ( $b ) { &b }
- if ( $token eq '(' ) {
+ # these arrays need not retain values between calls
+ $breakpoint_stack[$depth] = $starting_breakpoint_count;
+ $container_type[$depth] = "";
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 1;
+ $item_count_stack[$depth] = 0;
+ $last_nonblank_type[$depth] = "";
+ $opening_structure_index_stack[$depth] = -1;
+
+ $breakpoint_undo_stack[$depth] = undef;
+ $comma_index[$depth] = undef;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $old_breakpoint_count_stack[$depth] = undef;
+ $has_old_logical_breakpoints[$depth] = 0;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
+
+ # these arrays must retain values between calls
+ if ( !defined( $has_broken_sublist[$depth] ) ) {
+ $dont_align[$depth] = 0;
+ $has_broken_sublist[$depth] = 0;
+ $want_comma_break[$depth] = 0;
+ }
+ }
+ return;
+ }
- if ( $vert_last_nonblank_type eq 'k' ) {
- $alignment_type = ""
- unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
- }
- }
+ # routine to decide which commas to break at within a container;
+ # returns:
+ # $bp_count = number of comma breakpoints set
+ # $do_not_break_apart = a flag indicating if container need not
+ # be broken open
+ sub set_comma_breakpoints {
- # be sure the alignment tokens are unique
- # This didn't work well: reason not determined
- # if ($token ne $type) {$alignment_type .= $type}
- }
+ my ( $self, $dd ) = @_;
+ my $bp_count = 0;
+ my $do_not_break_apart = 0;
- # NOTE: This is deactivated because it causes the previous
- # if/elsif alignment to fail
- #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
- #{ $alignment_type = $type; }
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
- if ($alignment_type) {
- $last_vertical_alignment_before_index = $i;
- }
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ $self->do_uncontained_comma_breaks($dd);
+ }
- #--------------------------------------------------------
- # Next see if we want to align AFTER the previous nonblank
- #--------------------------------------------------------
+ # handle commas within containers...
+ else {
+ my $fbc = get_forced_breakpoint_count();
- # We want to line up ',' and interior ';' tokens, with the added
- # space AFTER these tokens. (Note: interior ';' is included
- # because it may occur in short blocks).
- if (
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
- # we haven't already set it
- !$alignment_type
+ $self->set_comma_breakpoints_do(
+ depth => $dd,
+ i_opening_paren => $opening_structure_index_stack[$dd],
+ i_closing_paren => $i,
+ item_count => $item_count_stack[$dd],
+ identifier_count => $identifier_count_stack[$dd],
+ rcomma_index => $comma_index[$dd],
+ next_nonblank_type => $next_nonblank_type,
+ list_type => $container_type[$dd],
+ interrupted => $interrupted_list[$dd],
+ rdo_not_break_apart => \$do_not_break_apart,
+ must_break_open => $must_break_open,
+ has_broken_sublist => $has_broken_sublist[$dd],
+ );
+ $bp_count = get_forced_breakpoint_count() - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
+ }
+ return ( $bp_count, $do_not_break_apart );
+ }
- # and its not the first token of the line
- && ( $i > $ibeg )
+ sub do_uncontained_comma_breaks {
- # and it follows a blank
- && $types_to_go[ $i - 1 ] eq 'b'
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my ( $self, $dd ) = @_;
+ my $bias = -.01;
+ my $old_comma_break_count = 0;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $old_comma_break_count++;
+ $bond_strength_to_go[$ii] = $bias;
- # and previous token IS one of these:
- && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
- # and it's NOT one of these
- && ( $type !~ /^[b\#\)\]\}]$/ )
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) there was exactly one old break before the first comma break
+ # (3) OLD: there are multiple old comma breaks
+ # (3) NEW: there are one or more old comma breaks (see return example)
+ #
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ #
+ # Another example, just one comma, where we will break after
+ # the return:
+ # return
+ # $x * cos($a) - $y * sin($a),
+ # $x * sin($a) + $y * cos($a);
- # then go ahead and align
- )
+ # Breaking a print statement:
+ # print SAVEOUT
+ # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
+ # ( $? & 128 ) ? " -- core dumped" : "", "\n";
+ #
+ # But we will not force a break after the opening paren here
+ # (causes a blinker):
+ # $heap->{stream}->set_output_filter(
+ # poe::filter::reference->new('myotherfreezer') ),
+ # ;
+ #
+ my $i_first_comma = $comma_index[$dd]->[0];
+ if ( $old_breakpoint_to_go[$i_first_comma] ) {
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
+ # Changed rule from multiple old commas to just one here:
+ if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
+ {
+ # Do not to break before an opening token because
+ # it can lead to "blinkers".
+ my $ibreakm = $ibreak;
+ $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
+ if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
{
- $alignment_type = $vert_last_nonblank_type;
+ $self->set_forced_breakpoint($ibreak);
}
+ }
+ }
+ return;
+ }
- #--------------------------------------------------------
- # Undo alignment in special cases
- #--------------------------------------------------------
- if ($alignment_type) {
+ my %is_logical_container;
- # do not align the opening brace of an anonymous sub
- if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
- $alignment_type = "";
+ BEGIN {
+ my @q = qw# if elsif unless while and or err not && | || ? : ! #;
+ @is_logical_container{@q} = (1) x scalar(@q);
+ }
+
+ sub set_for_semicolon_breakpoints {
+ my ( $self, $dd ) = @_;
+ foreach ( @{ $rfor_semicolon_list[$dd] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ return;
+ }
+
+ sub set_logical_breakpoints {
+ my ( $self, $dd ) = @_;
+ if (
+ $item_count_stack[$dd] == 0
+ && $is_logical_container{ $container_type[$dd] }
+
+ || $has_old_logical_breakpoints[$dd]
+ )
+ {
+
+ # Look for breaks in this order:
+ # 0 1 2 3
+ # or and || &&
+ foreach my $i ( 0 .. 3 ) {
+ if ( $rand_or_list[$dd][$i] ) {
+ foreach ( @{ $rand_or_list[$dd][$i] } ) {
+ $self->set_forced_breakpoint($_);
}
- }
- #--------------------------------------------------------
- # then store the value
- #--------------------------------------------------------
- $ralignment_type_to_go->[$i] = $alignment_type;
- if ( $type ne 'b' ) {
- $vert_last_nonblank_type = $type;
- $vert_last_nonblank_token = $token;
- $vert_last_nonblank_block_type = $block_type;
+ # break at any 'if' and 'unless' too
+ foreach ( @{ $rand_or_list[$dd][4] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ $rand_or_list[$dd] = [];
+ last;
}
}
}
- return $ralignment_type_to_go;
+ return;
}
-} ## end closure set_vertical_alignment_markers
-{ ## begin closure terminal_type_i
+ sub is_unbreakable_container {
+
+ # never break a container of one of these types
+ # because bad things can happen (map1.t)
+ my $dd = shift;
+ return $is_sort_map_grep{ $container_type[$dd] };
+ }
+
+ sub scan_list {
+
+ my ($self) = @_;
+
+ # This routine is responsible for setting line breaks for all lists,
+ # so that hierarchical structure can be displayed and so that list
+ # items can be vertically aligned. The output of this routine is
+ # stored in the array @forced_breakpoint_to_go, which is used to set
+ # final breakpoints.
+
+ # It is called once per batch if the batch is a list.
+ my $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
+ my $rOpts_break_at_old_comma_breakpoints =
+ $rOpts->{'break-at-old-comma-breakpoints'};
+ my $rOpts_break_at_old_keyword_breakpoints =
+ $rOpts->{'break-at-old-keyword-breakpoints'};
+ my $rOpts_break_at_old_logical_breakpoints =
+ $rOpts->{'break-at-old-logical-breakpoints'};
+ my $rOpts_break_at_old_method_breakpoints =
+ $rOpts->{'break-at-old-method-breakpoints'};
+ my $rOpts_break_at_old_semicolon_breakpoints =
+ $rOpts->{'break-at-old-semicolon-breakpoints'};
+ my $rOpts_break_at_old_ternary_breakpoints =
+ $rOpts->{'break-at-old-ternary-breakpoints'};
+ my $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+
+ $starting_depth = $nesting_depth_to_go[0];
- my %is_sort_map_grep_eval_do;
+ $block_type = ' ';
+ $current_depth = $starting_depth;
+ $i = -1;
+ $last_colon_sequence_number = -1;
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_nonblank_block_type = ' ';
+ $last_old_breakpoint_count = 0;
+ $minimum_depth = $current_depth + 1; # forces update in check below
+ $old_breakpoint_count = 0;
+ $starting_breakpoint_count = get_forced_breakpoint_count();
+ $token = ';';
+ $type = ';';
+ $type_sequence = '';
- BEGIN {
- my @q = qw(sort map grep eval do);
- @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
- }
+ my $total_depth_variation = 0;
+ my $i_old_assignment_break;
+ my $depth_last = $starting_depth;
- sub terminal_type_i {
+ check_for_new_minimum_depth($current_depth);
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
+ my $is_long_line = $self->excess_line_length( 0, $max_index_to_go ) > 0;
+ my $want_previous_breakpoint = -1;
- my ( $self, $ibeg, $iend ) = @_;
+ my $saw_good_breakpoint;
+ my $i_line_end = -1;
+ my $i_line_start = -1;
- # Start at the end and work backwards
- my $i = $iend;
- my $type_i = $types_to_go[$i];
+ # loop over all tokens in this batch
+ while ( ++$i <= $max_index_to_go ) {
+ if ( $type ne 'b' ) {
+ $i_last_nonblank_token = $i - 1;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ $last_nonblank_block_type = $block_type;
+ } ## end if ( $type ne 'b' )
+ $type = $types_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $token = $tokens_to_go[$i];
+ $type_sequence = $type_sequence_to_go[$i];
+ my $next_type = $types_to_go[ $i + 1 ];
+ my $next_token = $tokens_to_go[ $i + 1 ];
+ my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- # Check for side comment
- if ( $type_i eq '#' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ # set break if flag was set
+ if ( $want_previous_breakpoint >= 0 ) {
+ $self->set_forced_breakpoint($want_previous_breakpoint);
+ $want_previous_breakpoint = -1;
}
- $type_i = $types_to_go[$i];
- }
- # Skip past a blank
- if ( $type_i eq 'b' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
- }
- $type_i = $types_to_go[$i];
- }
+ $last_old_breakpoint_count = $old_breakpoint_count;
+ if ( $old_breakpoint_to_go[$i] ) {
+ $i_line_end = $i;
+ $i_line_start = $i_next_nonblank;
- # Found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $block_type_to_go[$i];
- if (
- $type_i eq '}'
- && ( !$block_type
- || ( $is_sort_map_grep_eval_do{$block_type} ) )
- )
- {
- $type_i = 'b';
- }
- return wantarray ? ( $type_i, $i ) : $type_i;
- }
+ $old_breakpoint_count++;
-} ## end closure terminal_type_i
+ # Break before certain keywords if user broke there and
+ # this is a 'safe' break point. The idea is to retain
+ # any preferred breaks for sequential list operations,
+ # like a schwartzian transform.
+ if ($rOpts_break_at_old_keyword_breakpoints) {
+ if (
+ $next_nonblank_type eq 'k'
+ && $is_keyword_returning_list{$next_nonblank_token}
+ && ( $type =~ /^[=\)\]\}Riw]$/
+ || $type eq 'k'
+ && $is_keyword_returning_list{$token} )
+ )
+ {
-{ ## begin closure set_bond_strengths
+ # we actually have to set this break next time through
+ # the loop because if we are at a closing token (such
+ # as '}') which forms a one-line block, this break might
+ # get undone.
+ $want_previous_breakpoint = $i;
+ } ## end if ( $next_nonblank_type...)
+ } ## end if ($rOpts_break_at_old_keyword_breakpoints)
- # These routines and variables are involved in deciding where to break very
- # long lines.
+ # Break before attributes if user broke there
+ if ($rOpts_break_at_old_attribute_breakpoints) {
+ if ( $next_nonblank_type eq 'A' ) {
+ $want_previous_breakpoint = $i;
+ }
+ }
- my %is_good_keyword_breakpoint;
- my %is_lt_gt_le_ge;
+ # remember an = break as possible good break point
+ if ( $is_assignment{$type} ) {
+ $i_old_assignment_break = $i;
+ }
+ elsif ( $is_assignment{$next_nonblank_type} ) {
+ $i_old_assignment_break = $i_next_nonblank;
+ }
+ } ## end if ( $old_breakpoint_to_go...)
- my %binary_bond_strength;
- my %nobreak_lhs;
- my %nobreak_rhs;
+ next if ( $type eq 'b' );
+ $depth = $nesting_depth_to_go[ $i + 1 ];
- my @bias_tokens;
- my $delta_bias;
+ $total_depth_variation += abs( $depth - $depth_last );
+ $depth_last = $depth;
- sub bias_table_key {
- my ( $type, $token ) = @_;
- my $bias_table_key = $type;
- if ( $type eq 'k' ) {
- $bias_table_key = $token;
- if ( $token eq 'err' ) { $bias_table_key = 'or' }
- }
- return $bias_table_key;
- }
+ # safety check - be sure we always break after a comment
+ # Shouldn't happen .. an error here probably means that the
+ # nobreak flag did not get turned off correctly during
+ # formatting.
+ if ( $type eq '#' ) {
+ if ( $i != $max_index_to_go ) {
+ warning(
+"Non-fatal program bug: backup logic needed to break after a comment\n"
+ );
+ report_definite_bug();
+ $nobreak_to_go[$i] = 0;
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $i != $max_index_to_go)
+ } ## end if ( $type eq '#' )
- sub initialize_bond_strength_hashes {
+ # Force breakpoints at certain tokens in long lines.
+ # Note that such breakpoints will be undone later if these tokens
+ # are fully contained within parens on a line.
+ if (
- my @q;
- @q = qw(if unless while until for foreach);
- @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
+ # break before a keyword within a line
+ $type eq 'k'
+ && $i > 0
- @q = qw(lt gt le ge);
- @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
- #
- # The decision about where to break a line depends upon a "bond
- # strength" between tokens. The LOWER the bond strength, the MORE
- # likely a break. A bond strength may be any value but to simplify
- # things there are several pre-defined strength levels:
+ # if one of these keywords:
+ && $token =~ /^(if|unless|while|until|for)$/
- # NO_BREAK => 10000;
- # VERY_STRONG => 100;
- # STRONG => 2.1;
- # NOMINAL => 1.1;
- # WEAK => 0.8;
- # VERY_WEAK => 0.55;
+ # but do not break at something like '1 while'
+ && ( $last_nonblank_type ne 'n' || $i > 2 )
- # The strength values are based on trial-and-error, and need to be
- # tweaked occasionally to get desired results. Some comments:
- #
- # 1. Only relative strengths are important. small differences
- # in strengths can make big formatting differences.
- # 2. Each indentation level adds one unit of bond strength.
- # 3. A value of NO_BREAK makes an unbreakable bond
- # 4. A value of VERY_WEAK is the strength of a ','
- # 5. Values below NOMINAL are considered ok break points.
- # 6. Values above NOMINAL are considered poor break points.
- #
- # The bond strengths should roughly follow precedence order where
- # possible. If you make changes, please check the results very
- # carefully on a variety of scripts. Testing with the -extrude
- # options is particularly helpful in exercising all of the rules.
+ # and let keywords follow a closing 'do' brace
+ && $last_nonblank_block_type ne 'do'
- # Wherever possible, bond strengths are defined in the following
- # tables. There are two main stages to setting bond strengths and
- # two types of tables:
- #
- # The first stage involves looking at each token individually and
- # defining left and right bond strengths, according to if we want
- # to break to the left or right side, and how good a break point it
- # is. For example tokens like =, ||, && make good break points and
- # will have low strengths, but one might want to break on either
- # side to put them at the end of one line or beginning of the next.
- #
- # The second stage involves looking at certain pairs of tokens and
- # defining a bond strength for that particular pair. This second
- # stage has priority.
+ && (
+ $is_long_line
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 1.
- # Set left and right bond strengths of individual tokens.
- #---------------------------------------------------------------
+ # or container is broken (by side-comment, etc)
+ || ( $next_nonblank_token eq '('
+ && $mate_index_to_go[$i_next_nonblank] < $i )
+ )
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ } ## end if ( $type eq 'k' && $i...)
- # NOTE: NO_BREAK's set in this section first are HINTS which will
- # probably not be honored. Essential NO_BREAKS's should be set in
- # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
- # of this subroutine.
+ # remember locations of -> if this is a pre-broken method chain
+ if ( $type eq '->' ) {
+ if ($rOpts_break_at_old_method_breakpoints) {
- # Note that we are setting defaults in this section. The user
- # cannot change bond strengths but can cause the left and right
- # bond strengths of any token type to be swapped through the use of
- # the -wba and -wbb flags. In this way the user can determine if a
- # breakpoint token should appear at the end of one line or the
- # beginning of the next line.
+ # Case 1: look for lines with leading pointers
+ if ( $i == $i_line_start ) {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
- %right_bond_strength = ();
- %left_bond_strength = ();
- %binary_bond_strength = ();
- %nobreak_lhs = ();
- %nobreak_rhs = ();
+ # Case 2: look for cuddled pointer calls
+ else {
- # The hash keys in this section are token types, plus the text of
- # certain keywords like 'or', 'and'.
+ # look for old lines with leading ')->' or ') ->'
+ # and, when found, force a break before the
+ # opening paren and after the previous closing paren.
+ if (
+ $types_to_go[$i_line_start] eq '}'
+ && ( $i == $i_line_start + 1
+ || $i == $i_line_start + 2
+ && $types_to_go[ $i - 1 ] eq 'b' )
+ )
+ {
+ $self->set_forced_breakpoint( $i_line_start - 1 );
+ $self->set_forced_breakpoint(
+ $mate_index_to_go[$i_line_start] );
+ }
+ }
+ }
+ } ## end if ( $type eq '->' )
- # no break around possible filehandle
- $left_bond_strength{'Z'} = NO_BREAK;
- $right_bond_strength{'Z'} = NO_BREAK;
+ elsif ( $type eq ';' ) {
+ if ( $i == $i_line_start
+ && $rOpts_break_at_old_semicolon_breakpoints )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
+ }
- # never put a bare word on a new line:
- # example print (STDERR, "bla"); will fail with break after (
- $left_bond_strength{'w'} = NO_BREAK;
+ # remember locations of '||' and '&&' for possible breaks if we
+ # decide this is a long logical expression.
+ elsif ( $type eq '||' ) {
+ push @{ $rand_or_list[$depth][2] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end elsif ( $type eq '||' )
+ elsif ( $type eq '&&' ) {
+ push @{ $rand_or_list[$depth][3] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end elsif ( $type eq '&&' )
+ elsif ( $type eq 'f' ) {
+ push @{ $rfor_semicolon_list[$depth] }, $i;
+ }
+ elsif ( $type eq 'k' ) {
+ if ( $token eq 'and' ) {
+ push @{ $rand_or_list[$depth][1] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end if ( $token eq 'and' )
- # blanks always have infinite strength to force breaks after
- # real tokens
- $right_bond_strength{'b'} = NO_BREAK;
+ # break immediately at 'or's which are probably not in a logical
+ # block -- but we will break in logical breaks below so that
+ # they do not add to the forced_breakpoint_count
+ elsif ( $token eq 'or' ) {
+ push @{ $rand_or_list[$depth][0] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ if ( $is_logical_container{ $container_type[$depth] } ) {
+ }
+ else {
+ if ($is_long_line) { $self->set_forced_breakpoint($i) }
+ elsif ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $saw_good_breakpoint = 1;
+ }
+ } ## end else [ if ( $is_logical_container...)]
+ } ## end elsif ( $token eq 'or' )
+ elsif ( $token eq 'if' || $token eq 'unless' ) {
+ push @{ $rand_or_list[$depth][4] }, $i;
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $self->set_forced_breakpoint($i);
+ }
+ } ## end elsif ( $token eq 'if' ||...)
+ } ## end elsif ( $type eq 'k' )
+ elsif ( $is_assignment{$type} ) {
+ $i_equals[$depth] = $i;
+ }
- # try not to break on exponentation
- @q = qw# ** .. ... <=> #;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ if ($type_sequence) {
- # The comma-arrow has very low precedence but not a good break point
- $left_bond_strength{'=>'} = NO_BREAK;
- $right_bond_strength{'=>'} = NOMINAL;
+ # handle any postponed closing breakpoints
+ if ( $token =~ /^[\)\]\}\:]$/ ) {
+ if ( $type eq ':' ) {
+ $last_colon_sequence_number = $type_sequence;
- # ok to break after label
- $left_bond_strength{'J'} = NO_BREAK;
- $right_bond_strength{'J'} = NOMINAL;
- $left_bond_strength{'j'} = STRONG;
- $right_bond_strength{'j'} = STRONG;
- $left_bond_strength{'A'} = STRONG;
- $right_bond_strength{'A'} = STRONG;
+ # retain break at a ':' line break
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_ternary_breakpoints )
+ {
- $left_bond_strength{'->'} = STRONG;
- $right_bond_strength{'->'} = VERY_STRONG;
+ $self->set_forced_breakpoint($i);
- $left_bond_strength{'CORE::'} = NOMINAL;
- $right_bond_strength{'CORE::'} = NO_BREAK;
+ # break at previous '='
+ if ( $i_equals[$depth] > 0 ) {
+ $self->set_forced_breakpoint(
+ $i_equals[$depth] );
+ $i_equals[$depth] = -1;
+ }
+ } ## end if ( ( $i == $i_line_start...))
+ } ## end if ( $type eq ':' )
+ if ( has_postponed_breakpoint($type_sequence) ) {
+ my $inc = ( $type eq ':' ) ? 0 : 1;
+ $self->set_forced_breakpoint( $i - $inc );
+ }
+ } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
- # breaking AFTER modulus operator is ok:
- @q = qw< % >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
+ # set breaks at ?/: if they will get separated (and are
+ # not a ?/: chain), or if the '?' is at the end of the
+ # line
+ elsif ( $token eq '?' ) {
+ my $i_colon = $mate_index_to_go[$i];
+ if (
+ $i_colon <= 0 # the ':' is not in this batch
+ || $i == 0 # this '?' is the first token of the line
+ || $i ==
+ $max_index_to_go # or this '?' is the last token
+ )
+ {
- # Break AFTER math operators * and /
- @q = qw< * / x >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ # don't break at a '?' if preceded by ':' on
+ # this line of previous ?/: pair on this line.
+ # This is an attempt to preserve a chain of ?/:
+ # expressions (elsif2.t). And don't break if
+ # this has a side comment.
+ $self->set_forced_breakpoint($i)
+ unless (
+ $type_sequence == (
+ $last_colon_sequence_number +
+ TYPE_SEQUENCE_INCREMENT
+ )
+ || $tokens_to_go[$max_index_to_go] eq '#'
+ );
+ $self->set_closing_breakpoint($i);
+ } ## end if ( $i_colon <= 0 ||...)
+ } ## end elsif ( $token eq '?' )
+ } ## end if ($type_sequence)
- # Break AFTER weakest math operators + and -
- # Make them weaker than * but a bit stronger than '.'
- @q = qw< + - >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
+#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
- # breaking BEFORE these is just ok:
- @q = qw# >> << #;
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
- @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ #------------------------------------------------------------
+ # Handle Increasing Depth..
+ #
+ # prepare for a new list when depth increases
+ # token $i is a '(','{', or '['
+ #------------------------------------------------------------
+ if ( $depth > $current_depth ) {
- # breaking before the string concatenation operator seems best
- # because it can be hard to see at the end of a line
- $right_bond_strength{'.'} = STRONG;
- $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
+ $breakpoint_stack[$depth] = get_forced_breakpoint_count();
+ $breakpoint_undo_stack[$depth] =
+ get_forced_breakpoint_undo_count();
+ $has_broken_sublist[$depth] = 0;
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 0;
+ $item_count_stack[$depth] = 0;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $last_nonblank_type[$depth] = $last_nonblank_type;
+ $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
+ $opening_structure_index_stack[$depth] = $i;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
+ $want_comma_break[$depth] = 0;
+ $container_type[$depth] =
+ ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
+ ? $last_nonblank_token
+ : "";
+ $has_old_logical_breakpoints[$depth] = 0;
- @q = qw< } ] ) R >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ # if line ends here then signal closing token to break
+ if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
+ {
+ $self->set_closing_breakpoint($i);
+ }
- # make these a little weaker than nominal so that they get
- # favored for end-of-line characters
- @q = qw< != == =~ !~ ~~ !~~ >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
+ # Not all lists of values should be vertically aligned..
+ $dont_align[$depth] =
- # break AFTER these
- @q = qw# < > | & >= <= #;
- @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
+ # code BLOCKS are handled at a higher level
+ ( $block_type ne "" )
- # breaking either before or after a quote is ok
- # but bias for breaking before a quote
- $left_bond_strength{'Q'} = NOMINAL;
- $right_bond_strength{'Q'} = NOMINAL + 0.02;
- $left_bond_strength{'q'} = NOMINAL;
- $right_bond_strength{'q'} = NOMINAL;
+ # certain paren lists
+ || ( $type eq '(' ) && (
+
+ # it does not usually look good to align a list of
+ # identifiers in a parameter list, as in:
+ # my($var1, $var2, ...)
+ # (This test should probably be refined, for now I'm just
+ # testing for any keyword)
+ ( $last_nonblank_type eq 'k' )
- # starting a line with a keyword is usually ok
- $left_bond_strength{'k'} = NOMINAL;
+ # a trailing '(' usually indicates a non-list
+ || ( $next_nonblank_type eq '(' )
+ );
- # we usually want to bond a keyword strongly to what immediately
- # follows, rather than leaving it stranded at the end of a line
- $right_bond_strength{'k'} = STRONG;
+ # patch to outdent opening brace of long if/for/..
+ # statements (like this one). See similar coding in
+ # set_continuation breaks. We have also catch it here for
+ # short line fragments which otherwise will not go through
+ # set_continuation_breaks.
+ if (
+ $block_type
- $left_bond_strength{'G'} = NOMINAL;
- $right_bond_strength{'G'} = STRONG;
+ # if we have the ')' but not its '(' in this batch..
+ && ( $last_nonblank_token eq ')' )
+ && $mate_index_to_go[$i_last_nonblank_token] < 0
- # assignment operators
- @q = qw(
- = **= += *= &= <<= &&=
- -= /= |= >>= ||= //=
- .= %= ^=
- x=
- );
+ # and user wants brace to left
+ && !$rOpts->{'opening-brace-always-on-right'}
- # Default is to break AFTER various assignment operators
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
+ && ( $type eq '{' ) # should be true
+ && ( $token eq '{' ) # should be true
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ } ## end if ( $block_type && ( ...))
+ } ## end if ( $depth > $current_depth)
- # Default is to break BEFORE '&&' and '||' and '//'
- # set strength of '||' to same as '=' so that chains like
- # $a = $b || $c || $d will break before the first '||'
- $right_bond_strength{'||'} = NOMINAL;
- $left_bond_strength{'||'} = $right_bond_strength{'='};
+ #------------------------------------------------------------
+ # Handle Decreasing Depth..
+ #
+ # finish off any old list when depth decreases
+ # token $i is a ')','}', or ']'
+ #------------------------------------------------------------
+ elsif ( $depth < $current_depth ) {
- # same thing for '//'
- $right_bond_strength{'//'} = NOMINAL;
- $left_bond_strength{'//'} = $right_bond_strength{'='};
+ check_for_new_minimum_depth($depth);
- # set strength of && a little higher than ||
- $right_bond_strength{'&&'} = NOMINAL;
- $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
+ # force all outer logical containers to break after we see on
+ # old breakpoint
+ $has_old_logical_breakpoints[$depth] ||=
+ $has_old_logical_breakpoints[$current_depth];
- $left_bond_strength{';'} = VERY_STRONG;
- $right_bond_strength{';'} = VERY_WEAK;
- $left_bond_strength{'f'} = VERY_STRONG;
+ # Patch to break between ') {' if the paren list is broken.
+ # There is similar logic in set_continuation_breaks for
+ # non-broken lists.
+ if ( $token eq ')'
+ && $next_nonblank_block_type
+ && $interrupted_list[$current_depth]
+ && $next_nonblank_type eq '{'
+ && !$rOpts->{'opening-brace-always-on-right'} )
+ {
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $token eq ')' && ...
- # make right strength of for ';' a little less than '='
- # to make for contents break after the ';' to avoid this:
- # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
- # $number_of_fields )
- # and make it weaker than ',' and 'and' too
- $right_bond_strength{'f'} = VERY_WEAK - 0.03;
+#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
- # The strengths of ?/: should be somewhere between
- # an '=' and a quote (NOMINAL),
- # make strength of ':' slightly less than '?' to help
- # break long chains of ? : after the colons
- $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
- $right_bond_strength{':'} = NO_BREAK;
- $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
- $right_bond_strength{'?'} = NO_BREAK;
+ # set breaks at commas if necessary
+ my ( $bp_count, $do_not_break_apart ) =
+ $self->set_comma_breakpoints($current_depth);
- $left_bond_strength{','} = VERY_STRONG;
- $right_bond_strength{','} = VERY_WEAK;
+ my $i_opening = $opening_structure_index_stack[$current_depth];
+ my $saw_opening_structure = ( $i_opening >= 0 );
- # remaining digraphs and trigraphs not defined above
- @q = qw( :: <> ++ --);
- @left_bond_strength{@q} = (WEAK) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ # this term is long if we had to break at interior commas..
+ my $is_long_term = $bp_count > 0;
- # Set bond strengths of certain keywords
- # make 'or', 'err', 'and' slightly weaker than a ','
- $left_bond_strength{'and'} = VERY_WEAK - 0.01;
- $left_bond_strength{'or'} = VERY_WEAK - 0.02;
- $left_bond_strength{'err'} = VERY_WEAK - 0.02;
- $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
- $right_bond_strength{'and'} = NOMINAL;
- $right_bond_strength{'or'} = NOMINAL;
- $right_bond_strength{'err'} = NOMINAL;
- $right_bond_strength{'xor'} = NOMINAL;
+ # If this is a short container with one or more comma arrows,
+ # then we will mark it as a long term to open it if requested.
+ # $rOpts_comma_arrow_breakpoints =
+ # 0 - open only if comma precedes closing brace
+ # 1 - stable: except for one line blocks
+ # 2 - try to form 1 line blocks
+ # 3 - ignore =>
+ # 4 - always open up if vt=0
+ # 5 - stable: even for one line blocks if vt=0
+ if ( !$is_long_term
+ && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
+ && $index_before_arrow[ $depth + 1 ] > 0
+ && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
+ )
+ {
+ $is_long_term = $rOpts_comma_arrow_breakpoints == 4
+ || ( $rOpts_comma_arrow_breakpoints == 0
+ && $last_nonblank_token eq ',' )
+ || ( $rOpts_comma_arrow_breakpoints == 5
+ && $old_breakpoint_to_go[$i_opening] );
+ } ## end if ( !$is_long_term &&...)
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 2.
- # Set binary rules for bond strengths between certain token types.
- #---------------------------------------------------------------
+ # mark term as long if the length between opening and closing
+ # parens exceeds allowed line length
+ if ( !$is_long_term && $saw_opening_structure ) {
+ my $i_opening_minus =
+ $self->find_token_starting_list($i_opening);
- # We have a little problem making tables which apply to the
- # container tokens. Here is a list of container tokens and
- # their types:
- #
- # type tokens // meaning
- # { {, [, ( // indent
- # } }, ], ) // outdent
- # [ [ // left non-structural [ (enclosing an array index)
- # ] ] // right non-structural square bracket
- # ( ( // left non-structural paren
- # ) ) // right non-structural paren
- # L { // left non-structural curly brace (enclosing a key)
- # R } // right non-structural curly brace
- #
- # Some rules apply to token types and some to just the token
- # itself. We solve the problem by combining type and token into a
- # new hash key for the container types.
- #
- # If a rule applies to a token 'type' then we need to make rules
- # for each of these 'type.token' combinations:
- # Type Type.Token
- # { {{, {[, {(
- # [ [[
- # ( ((
- # L L{
- # } }}, }], })
- # ] ]]
- # ) ))
- # R R}
- #
- # If a rule applies to a token then we need to make rules for
- # these 'type.token' combinations:
- # Token Type.Token
- # { {{, L{
- # [ {[, [[
- # ( {(, ((
- # } }}, R}
- # ] }], ]]
- # ) }), ))
+ # Note: we have to allow for one extra space after a
+ # closing token so that we do not strand a comma or
+ # semicolon, hence the '>=' here (oneline.t)
+ # Note: we ignore left weld lengths here for best results
+ $is_long_term =
+ $self->excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
+ } ## end if ( !$is_long_term &&...)
- # allow long lines before final { in an if statement, as in:
- # if (..........
- # ..........)
- # {
- #
- # Otherwise, the line before the { tends to be too short.
+ # We've set breaks after all comma-arrows. Now we have to
+ # undo them if this can be a one-line block
+ # (the only breakpoints set will be due to comma-arrows)
+ if (
- $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
- $binary_bond_strength{'(('}{'{{'} = NOMINAL;
+ # user doesn't require breaking after all comma-arrows
+ ( $rOpts_comma_arrow_breakpoints != 0 )
+ && ( $rOpts_comma_arrow_breakpoints != 4 )
- # break on something like '} (', but keep this stronger than a ','
- # example is in 'howe.pl'
- $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ # and if the opening structure is in this batch
+ && $saw_opening_structure
- # keep matrix and hash indices together
- # but make them a little below STRONG to allow breaking open
- # something like {'some-word'}{'some-very-long-word'} at the }{
- # (bracebrk.t)
- $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ # and either on the same old line
+ && (
+ $old_breakpoint_count_stack[$current_depth] ==
+ $last_old_breakpoint_count
- # increase strength to the point where a break in the following
- # will be after the opening paren rather than at the arrow:
- # $a->$b($c);
- $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
+ # or user wants to form long blocks with arrows
+ || $rOpts_comma_arrow_breakpoints == 2
+ )
- # Note that the following alternative strength would make the break at the
- # '->' rather than opening the '('. Both have advantages and disadvantages.
- # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
+ # and we made some breakpoints between the opening and closing
+ && ( $breakpoint_undo_stack[$current_depth] <
+ get_forced_breakpoint_undo_count() )
- $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ # and this block is short enough to fit on one line
+ # Note: use < because need 1 more space for possible comma
+ && !$is_long_term
- $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ )
+ {
+ $self->undo_forced_breakpoint_stack(
+ $breakpoint_undo_stack[$current_depth] );
+ } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
- #---------------------------------------------------------------
- # Binary NO_BREAK rules
- #---------------------------------------------------------------
+ # now see if we have any comma breakpoints left
+ my $has_comma_breakpoints =
+ ( $breakpoint_stack[$current_depth] !=
+ get_forced_breakpoint_count() );
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
- $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
+ # update broken-sublist flag of the outer container
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
+ || $has_broken_sublist[$current_depth]
+ || $is_long_term
+ || $has_comma_breakpoints;
- # Never break between a bareword and a following paren because
- # perl may give an error. For example, if a break is placed
- # between 'to_filehandle' and its '(' the following line will
- # give a syntax error [Carp.pm]: my( $no) =fileno(
- # to_filehandle( $in)) ;
- $binary_bond_strength{'C'}{'(('} = NO_BREAK;
- $binary_bond_strength{'C'}{'{('} = NO_BREAK;
- $binary_bond_strength{'U'}{'(('} = NO_BREAK;
- $binary_bond_strength{'U'}{'{('} = NO_BREAK;
+# Having come to the closing ')', '}', or ']', now we have to decide if we
+# should 'open up' the structure by placing breaks at the opening and
+# closing containers. This is a tricky decision. Here are some of the
+# basic considerations:
+#
+# -If this is a BLOCK container, then any breakpoints will have already
+# been set (and according to user preferences), so we need do nothing here.
+#
+# -If we have a comma-separated list for which we can align the list items,
+# then we need to do so because otherwise the vertical aligner cannot
+# currently do the alignment.
+#
+# -If this container does itself contain a container which has been broken
+# open, then it should be broken open to properly show the structure.
+#
+# -If there is nothing to align, and no other reason to break apart,
+# then do not do it.
+#
+# We will not break open the parens of a long but 'simple' logical expression.
+# For example:
+#
+# This is an example of a simple logical expression and its formatting:
+#
+# if ( $bigwasteofspace1 && $bigwasteofspace2
+# || $bigwasteofspace3 && $bigwasteofspace4 )
+#
+# Most people would prefer this than the 'spacey' version:
+#
+# if (
+# $bigwasteofspace1 && $bigwasteofspace2
+# || $bigwasteofspace3 && $bigwasteofspace4
+# )
+#
+# To illustrate the rules for breaking logical expressions, consider:
+#
+# FULLY DENSE:
+# if ( $opt_excl
+# and ( exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc ))
+#
+# This is on the verge of being difficult to read. The current default is to
+# open it up like this:
+#
+# DEFAULT:
+# if (
+# $opt_excl
+# and ( exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc )
+# )
+#
+# This is a compromise which tries to avoid being too dense and to spacey.
+# A more spaced version would be:
+#
+# SPACEY:
+# if (
+# $opt_excl
+# and (
+# exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc
+# )
+# )
+#
+# Some people might prefer the spacey version -- an option could be added. The
+# innermost expression contains a long block '( exists $ids_... ')'.
+#
+# Here is how the logic goes: We will force a break at the 'or' that the
+# innermost expression contains, but we will not break apart its opening and
+# closing containers because (1) it contains no multi-line sub-containers itself,
+# and (2) there is no alignment to be gained by breaking it open like this
+#
+# and (
+# exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc
+# )
+#
+# (although this looks perfectly ok and might be good for long expressions). The
+# outer 'if' container, though, contains a broken sub-container, so it will be
+# broken open to avoid too much density. Also, since it contains no 'or's, there
+# will be a forced break at its 'and'.
- # use strict requires that bare word within braces not start new
- # line
- $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
+ # set some flags telling something about this container..
+ my $is_simple_logical_expression = 0;
+ if ( $item_count_stack[$current_depth] == 0
+ && $saw_opening_structure
+ && $tokens_to_go[$i_opening] eq '('
+ && $is_logical_container{ $container_type[$current_depth] }
+ )
+ {
- $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
+ # This seems to be a simple logical expression with
+ # no existing breakpoints. Set a flag to prevent
+ # opening it up.
+ if ( !$has_comma_breakpoints ) {
+ $is_simple_logical_expression = 1;
+ }
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
+ # This seems to be a simple logical expression with
+ # breakpoints (broken sublists, for example). Break
+ # at all 'or's and '||'s.
+ else {
+ $self->set_logical_breakpoints($current_depth);
+ }
+ } ## end if ( $item_count_stack...)
- # use strict does not allow separating type info from trailing { }
- # testfile is readmail.pl
- $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
- $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
+ if ( $is_long_term
+ && @{ $rfor_semicolon_list[$current_depth] } )
+ {
+ $self->set_for_semicolon_breakpoints($current_depth);
- # As a defensive measure, do not break between a '(' and a
- # filehandle. In some cases, this can cause an error. For
- # example, the following program works:
- # my $msg="hi!\n";
- # print
- # ( STDOUT
- # $msg
- # );
- #
- # But this program fails:
- # my $msg="hi!\n";
- # print
- # (
- # STDOUT
- # $msg
- # );
- #
- # This is normally only a problem with the 'extrude' option
- $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
- $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
+ # open up a long 'for' or 'foreach' container to allow
+ # leading term alignment unless -lp is used.
+ $has_comma_breakpoints = 1
+ unless $rOpts_line_up_parentheses;
+ } ## end if ( $is_long_term && ...)
- # never break between sub name and opening paren
- $binary_bond_strength{'w'}{'(('} = NO_BREAK;
- $binary_bond_strength{'w'}{'{('} = NO_BREAK;
+ if (
- # keep '}' together with ';'
- $binary_bond_strength{'}}'}{';'} = NO_BREAK;
+ # breaks for code BLOCKS are handled at a higher level
+ !$block_type
- # Breaking before a ++ can cause perl to guess wrong. For
- # example the following line will cause a syntax error
- # with -extrude if we break between '$i' and '++' [fixstyle2]
- # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
- $nobreak_lhs{'++'} = NO_BREAK;
+ # we do not need to break at the top level of an 'if'
+ # type expression
+ && !$is_simple_logical_expression
- # Do not break before a possible file handle
- $nobreak_lhs{'Z'} = NO_BREAK;
+ ## modification to keep ': (' containers vertically tight;
+ ## but probably better to let user set -vt=1 to avoid
+ ## inconsistency with other paren types
+ ## && ($container_type[$current_depth] ne ':')
- # use strict hates bare words on any new line. For
- # example, a break before the underscore here provokes the
- # wrath of use strict:
- # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
- $nobreak_rhs{'F'} = NO_BREAK;
- $nobreak_rhs{'CORE::'} = NO_BREAK;
+ # otherwise, we require one of these reasons for breaking:
+ && (
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 3.
- # Define tables and values for applying a small bias to the above
- # values.
- #---------------------------------------------------------------
- # Adding a small 'bias' to strengths is a simple way to make a line
- # break at the first of a sequence of identical terms. For
- # example, to force long string of conditional operators to break
- # with each line ending in a ':', we can add a small number to the
- # bond strength of each ':' (colon.t)
- @bias_tokens = qw( : && || f and or . ); # tokens which get bias
- $delta_bias = 0.0001; # a very small strength level
- return;
+ # - this term has forced line breaks
+ $has_comma_breakpoints
- } ## end sub initialize_bond_strength_hashes
+ # - the opening container is separated from this batch
+ # for some reason (comment, blank line, code block)
+ # - this is a non-paren container spanning multiple lines
+ || !$saw_opening_structure
- my $DEBUG_BOND;
+ # - this is a long block contained in another breakable
+ # container
+ || ( $is_long_term
+ && $container_environment_to_go[$i_opening] ne
+ 'BLOCK' )
+ )
+ )
+ {
- sub set_bond_strengths {
+ # For -lp option, we must put a breakpoint before
+ # the token which has been identified as starting
+ # this indentation level. This is necessary for
+ # proper alignment.
+ if ( $rOpts_line_up_parentheses && $saw_opening_structure )
+ {
+ my $item = $leading_spaces_to_go[ $i_opening + 1 ];
+ if ( $i_opening + 1 < $max_index_to_go
+ && $types_to_go[ $i_opening + 1 ] eq 'b' )
+ {
+ $item = $leading_spaces_to_go[ $i_opening + 2 ];
+ }
+ if ( defined($item) ) {
+ my $i_start_2;
+ my $K_start_2 = $item->get_starting_index_K();
+ if ( defined($K_start_2) ) {
+ $i_start_2 = $K_start_2 - $K_to_go[0];
+ }
+ if (
+ defined($i_start_2)
- my ($self) = @_;
+ # we are breaking after an opening brace, paren,
+ # so don't break before it too
+ && $i_start_2 ne $i_opening
+ && $i_start_2 >= 0
+ && $i_start_2 <= $max_index_to_go
+ )
+ {
+
+ # Only break for breakpoints at the same
+ # indentation level as the opening paren
+ my $test1 = $nesting_depth_to_go[$i_opening];
+ my $test2 = $nesting_depth_to_go[$i_start_2];
+ if ( $test2 == $test1 ) {
+ $self->set_forced_breakpoint(
+ $i_start_2 - 1 );
+ }
+ } ## end if ( defined($i_start_2...))
+ } ## end if ( defined($item) )
+ } ## end if ( $rOpts_line_up_parentheses...)
- # patch-its always ok to break at end of line
- $nobreak_to_go[$max_index_to_go] = 0;
+ # break after opening structure.
+ # note: break before closing structure will be automatic
+ if ( $minimum_depth <= $current_depth ) {
- my $rOpts_short_concatenation_item_length =
- $rOpts->{'short-concatenation-item-length'};
+ $self->set_forced_breakpoint($i_opening)
+ unless ( $do_not_break_apart
+ || is_unbreakable_container($current_depth) );
- # we start a new set of bias values for each line
- my %bias;
- @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
- my $code_bias = -.01; # bias for closing block braces
+ # break at ',' of lower depth level before opening token
+ if ( $last_comma_index[$depth] ) {
+ $self->set_forced_breakpoint(
+ $last_comma_index[$depth] );
+ }
- my $type = 'b';
- my $token = ' ';
- my $token_length = 1;
- my $last_type;
- my $last_nonblank_type = $type;
- my $last_nonblank_token = $token;
- my $list_str = $left_bond_strength{'?'};
+ # break at '.' of lower depth level before opening token
+ if ( $last_dot_index[$depth] ) {
+ $self->set_forced_breakpoint(
+ $last_dot_index[$depth] );
+ }
- my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
- $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
- );
+ # break before opening structure if preceded by another
+ # closing structure and a comma. This is normally
+ # done by the previous closing brace, but not
+ # if it was a one-line block.
+ if ( $i_opening > 2 ) {
+ my $i_prev =
+ ( $types_to_go[ $i_opening - 1 ] eq 'b' )
+ ? $i_opening - 2
+ : $i_opening - 1;
- # main loop to compute bond strengths between each pair of tokens
- foreach my $i ( 0 .. $max_index_to_go ) {
- $last_type = $type;
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
- $type = $types_to_go[$i];
+ if ( $types_to_go[$i_prev] eq ','
+ && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
+ {
+ $self->set_forced_breakpoint($i_prev);
+ }
- # strength on both sides of a blank is the same
- if ( $type eq 'b' && $last_type ne 'b' ) {
- $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
- next;
- }
+ # also break before something like ':(' or '?('
+ # if appropriate.
+ elsif (
+ $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
+ {
+ my $token_prev = $tokens_to_go[$i_prev];
+ if ( $want_break_before{$token_prev} ) {
+ $self->set_forced_breakpoint($i_prev);
+ }
+ } ## end elsif ( $types_to_go[$i_prev...])
+ } ## end if ( $i_opening > 2 )
+ } ## end if ( $minimum_depth <=...)
- $token = $tokens_to_go[$i];
- $token_length = $token_lengths_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $i_next = $i + 1;
- $next_type = $types_to_go[$i_next];
- $next_token = $tokens_to_go[$i_next];
- $total_nesting_depth = $nesting_depth_to_go[$i_next];
- $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # break after comma following closing structure
+ if ( $next_type eq ',' ) {
+ $self->set_forced_breakpoint( $i + 1 );
+ }
- # We are computing the strength of the bond between the current
- # token and the NEXT token.
+ # break before an '=' following closing structure
+ if (
+ $is_assignment{$next_nonblank_type}
+ && ( $breakpoint_stack[$current_depth] !=
+ get_forced_breakpoint_count() )
+ )
+ {
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $is_assignment{$next_nonblank_type...})
- #---------------------------------------------------------------
- # Bond Strength Section 1:
- # First Approximation.
- # Use minimum of individual left and right tabulated bond
- # strengths.
- #---------------------------------------------------------------
- my $bsr = $right_bond_strength{$type};
- my $bsl = $left_bond_strength{$next_nonblank_type};
+ # break at any comma before the opening structure Added
+ # for -lp, but seems to be good in general. It isn't
+ # obvious how far back to look; the '5' below seems to
+ # work well and will catch the comma in something like
+ # push @list, myfunc( $param, $param, ..
- # define right bond strengths of certain keywords
- if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
- $bsr = $right_bond_strength{$token};
- }
- elsif ( $token eq 'ne' or $token eq 'eq' ) {
- $bsr = NOMINAL;
- }
+ my $icomma = $last_comma_index[$depth];
+ if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
+ unless ( $forced_breakpoint_to_go[$icomma] ) {
+ $self->set_forced_breakpoint($icomma);
+ }
+ }
+ } # end logic to open up a container
- # set terminal bond strength to the nominal value
- # this will cause good preceding breaks to be retained
- if ( $i_next_nonblank > $max_index_to_go ) {
- $bsl = NOMINAL;
- }
+ # Break open a logical container open if it was already open
+ elsif ($is_simple_logical_expression
+ && $has_old_logical_breakpoints[$current_depth] )
+ {
+ $self->set_logical_breakpoints($current_depth);
+ }
- # define right bond strengths of certain keywords
- if ( $next_nonblank_type eq 'k'
- && defined( $left_bond_strength{$next_nonblank_token} ) )
- {
- $bsl = $left_bond_strength{$next_nonblank_token};
- }
- elsif ($next_nonblank_token eq 'ne'
- or $next_nonblank_token eq 'eq' )
- {
- $bsl = NOMINAL;
- }
- elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
- $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
- }
+ # Handle long container which does not get opened up
+ elsif ($is_long_term) {
- # Use the minimum of the left and right strengths. Note: it might
- # seem that we would want to keep a NO_BREAK if either token has
- # this value. This didn't work, for example because in an arrow
- # list, it prevents the comma from separating from the following
- # bare word (which is probably quoted by its arrow). So necessary
- # NO_BREAK's have to be handled as special cases in the final
- # section.
- if ( !defined($bsr) ) { $bsr = VERY_STRONG }
- if ( !defined($bsl) ) { $bsl = VERY_STRONG }
- my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
- my $bond_str_1 = $bond_str;
+ # must set fake breakpoint to alert outer containers that
+ # they are complex
+ set_fake_breakpoint();
+ } ## end elsif ($is_long_term)
- #---------------------------------------------------------------
- # Bond Strength Section 2:
- # Apply hardwired rules..
- #---------------------------------------------------------------
+ } ## end elsif ( $depth < $current_depth)
- # Patch to put terminal or clauses on a new line: Weaken the bond
- # at an || followed by die or similar keyword to make the terminal
- # or clause fall on a new line, like this:
- #
- # my $class = shift
- # || die "Cannot add broadcast: No class identifier found";
- #
- # Otherwise the break will be at the previous '=' since the || and
- # = have the same starting strength and the or is biased, like
- # this:
- #
- # my $class =
- # shift || die "Cannot add broadcast: No class identifier found";
- #
- # In any case if the user places a break at either the = or the ||
- # it should remain there.
- if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
- if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
- if ( $want_break_before{$token} && $i > 0 ) {
- $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
+ #------------------------------------------------------------
+ # Handle this token
+ #------------------------------------------------------------
- # keep bond strength of a token and its following blank
- # the same
- if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
- $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
- }
- }
- else {
- $bond_str -= $delta_bias;
- }
- }
- }
+ $current_depth = $depth;
- # good to break after end of code blocks
- if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
+ # handle comma-arrow
+ if ( $type eq '=>' ) {
+ next if ( $last_nonblank_type eq '=>' );
+ next if $rOpts_break_at_old_comma_breakpoints;
+ next if $rOpts_comma_arrow_breakpoints == 3;
+ $want_comma_break[$depth] = 1;
+ $index_before_arrow[$depth] = $i_last_nonblank_token;
+ next;
+ } ## end if ( $type eq '=>' )
- $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
- $code_bias += $delta_bias;
+ elsif ( $type eq '.' ) {
+ $last_dot_index[$depth] = $i;
}
- if ( $type eq 'k' ) {
-
- # allow certain control keywords to stand out
- if ( $next_nonblank_type eq 'k'
- && $is_last_next_redo_return{$token} )
- {
- $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
- }
+ # Turn off alignment if we are sure that this is not a list
+ # environment. To be safe, we will do this if we see certain
+ # non-list tokens, such as ';', and also the environment is
+ # not a list. Note that '=' could be in any of the = operators
+ # (lextest.t). We can't just use the reported environment
+ # because it can be incorrect in some cases.
+ elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
+ && $container_environment_to_go[$i] ne 'LIST' )
+ {
+ $dont_align[$depth] = 1;
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
- # Don't break after keyword my. This is a quick fix for a
- # rare problem with perl. An example is this line from file
- # Container.pm:
+ # now just handle any commas
+ next unless ( $type eq ',' );
- # foreach my $question( Debian::DebConf::ConfigDb::gettree(
- # $this->{'question'} ) )
+ $last_dot_index[$depth] = undef;
+ $last_comma_index[$depth] = $i;
- if ( $token eq 'my' ) {
- $bond_str = NO_BREAK;
+ # break here if this comma follows a '=>'
+ # but not if there is a side comment after the comma
+ if ( $want_comma_break[$depth] ) {
+
+ if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
+ if ($rOpts_comma_arrow_breakpoints) {
+ $want_comma_break[$depth] = 0;
+ next;
+ }
}
- }
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
- # good to break before 'if', 'unless', etc
- if ( $is_if_brace_follower{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK;
- }
+ # break before the previous token if it looks safe
+ # Example of something that we will not try to break before:
+ # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
+ # Also we don't want to break at a binary operator (like +):
+ # $c->createOval(
+ # $x + $R, $y +
+ # $R => $x - $R,
+ # $y - $R, -fill => 'black',
+ # );
+ my $ibreak = $index_before_arrow[$depth] - 1;
+ if ( $ibreak > 0
+ && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
+ {
+ if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
- if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
+ # don't break pointer calls, such as the following:
+ # File::Spec->curdir => 1,
+ # (This is tokenized as adjacent 'w' tokens)
+ ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
- # FIXME: needs more testing
- if ( $is_keyword_returning_list{$next_nonblank_token} ) {
- $bond_str = $list_str if ( $bond_str > $list_str );
- }
+ # And don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
+ $self->set_forced_breakpoint($ibreak);
+ }
+ } ## end if ( $types_to_go[$ibreak...])
+ } ## end if ( $ibreak > 0 && $tokens_to_go...)
- # keywords like 'unless', 'if', etc, within statements
- # make good breaks
- if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK / 1.05;
- }
- }
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
- # try not to break before a comma-arrow
- elsif ( $next_nonblank_type eq '=>' ) {
- if ( $bond_str < STRONG ) { $bond_str = STRONG }
+ # handle list which mixes '=>'s and ','s:
+ # treat any list items so far as an interrupted list
+ $interrupted_list[$depth] = 1;
+ next;
+ } ## end if ( $want_comma_break...)
+
+ # break after all commas above starting depth
+ if ( $depth < $starting_depth && !$dont_align[$depth] ) {
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
+ next;
}
- #---------------------------------------------------------------
- # Additional hardwired NOBREAK rules
- #---------------------------------------------------------------
+ # add this comma to the list..
+ my $item_count = $item_count_stack[$depth];
+ if ( $item_count == 0 ) {
- # map1.t -- correct for a quirk in perl
- if ( $token eq '('
- && $next_nonblank_type eq 'i'
- && $last_nonblank_type eq 'k'
- && $is_sort_map_grep{$last_nonblank_token} )
+ # but do not form a list with no opening structure
+ # for example:
- # /^(sort|map|grep)$/ )
- {
- $bond_str = NO_BREAK;
- }
+ # open INFILE_COPY, ">$input_file_copy"
+ # or die ("very long message");
- # extrude.t: do not break before paren at:
- # -l pid_filename(
- if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
- $bond_str = NO_BREAK;
+ if ( ( $opening_structure_index_stack[$depth] < 0 )
+ && $container_environment_to_go[$i] eq 'BLOCK' )
+ {
+ $dont_align[$depth] = 1;
+ }
+ } ## end if ( $item_count == 0 )
+
+ $comma_index[$depth][$item_count] = $i;
+ ++$item_count_stack[$depth];
+ if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
+ $identifier_count_stack[$depth]++;
}
+ } ## end while ( ++$i <= $max_index_to_go)
- # in older version of perl, use strict can cause problems with
- # breaks before bare words following opening parens. For example,
- # this will fail under older versions if a break is made between
- # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
- # command"); close MAIL;
- if ( $type eq '{' ) {
+ #-------------------------------------------
+ # end of loop over all tokens in this batch
+ #-------------------------------------------
- if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
+ # set breaks for any unfinished lists ..
+ for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
- # but it's fine to break if the word is followed by a '=>'
- # or if it is obviously a sub call
- my $i_next_next_nonblank = $i_next_nonblank + 1;
- my $next_next_type = $types_to_go[$i_next_next_nonblank];
- if ( $next_next_type eq 'b'
- && $i_next_nonblank < $max_index_to_go )
- {
- $i_next_next_nonblank++;
- $next_next_type = $types_to_go[$i_next_next_nonblank];
- }
+ $interrupted_list[$dd] = 1;
+ $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
+ $self->set_comma_breakpoints($dd);
+ $self->set_logical_breakpoints($dd)
+ if ( $has_old_logical_breakpoints[$dd] );
+ $self->set_for_semicolon_breakpoints($dd);
- # We'll check for an old breakpoint and keep a leading
- # bareword if it was that way in the input file.
- # Presumably it was ok that way. For example, the
- # following would remain unchanged:
- #
- # @months = (
- # January, February, March, April,
- # May, June, July, August,
- # September, October, November, December,
- # );
- #
- # This should be sufficient:
- if (
- !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ','
- || $next_next_type eq '}' )
- )
- {
- $bond_str = NO_BREAK;
- }
- }
- }
+ # break open container...
+ my $i_opening = $opening_structure_index_stack[$dd];
+ $self->set_forced_breakpoint($i_opening)
+ unless (
+ is_unbreakable_container($dd)
- # Do not break between a possible filehandle and a ? or / and do
- # not introduce a break after it if there is no blank
- # (extrude.t)
- elsif ( $type eq 'Z' ) {
+ # Avoid a break which would place an isolated ' or "
+ # on a line
+ || ( $type eq 'Q'
+ && $i_opening >= $max_index_to_go - 2
+ && $token =~ /^['"]$/ )
+ );
+ } ## end for ( my $dd = $current_depth...)
- # don't break..
- if (
+ # Return a flag indicating if the input file had some good breakpoints.
+ # This flag will be used to force a break in a line shorter than the
+ # allowed line length.
+ if ( $has_old_logical_breakpoints[$current_depth] ) {
+ $saw_good_breakpoint = 1;
+ }
- # if there is no blank and we do not want one. Examples:
- # print $x++ # do not break after $x
- # print HTML"HELLO" # break ok after HTML
- (
- $next_type ne 'b'
- && defined( $want_left_space{$next_type} )
- && $want_left_space{$next_type} == WS_NO
- )
+ # A complex line with one break at an = has a good breakpoint.
+ # This is not complex ($total_depth_variation=0):
+ # $res1
+ # = 10;
+ #
+ # This is complex ($total_depth_variation=6):
+ # $res2 =
+ # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
+ elsif ($i_old_assignment_break
+ && $total_depth_variation > 4
+ && $old_breakpoint_count == 1 )
+ {
+ $saw_good_breakpoint = 1;
+ } ## end elsif ( $i_old_assignment_break...)
- # or we might be followed by the start of a quote
- || $next_nonblank_type =~ /^[\/\?]$/
- )
- {
- $bond_str = NO_BREAK;
- }
- }
+ return $saw_good_breakpoint;
+ } ## end sub scan_list
+} ## end closure scan_list
- # Breaking before a ? before a quote can cause trouble if
- # they are not separated by a blank.
- # Example: a syntax error occurs if you break before the ? here
- # my$logic=join$all?' && ':' || ',@regexps;
- # From: Professional_Perl_Programming_Code/multifind.pl
- if ( $next_nonblank_type eq '?' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
- }
+sub find_token_starting_list {
- # Breaking before a . followed by a number
- # can cause trouble if there is no intervening space
- # Example: a syntax error occurs if you break before the .2 here
- # $str .= pack($endian.2, ensurrogate($ord));
- # From: perl58/Unicode.pm
- elsif ( $next_nonblank_type eq '.' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
- }
+ # When testing to see if a block will fit on one line, some
+ # previous token(s) may also need to be on the line; particularly
+ # if this is a sub call. So we will look back at least one
+ # token. NOTE: This isn't perfect, but not critical, because
+ # if we mis-identify a block, it will be wrapped and therefore
+ # fixed the next time it is formatted.
+ my ( $self, $i_opening_paren ) = @_;
+ my $i_opening_minus = $i_opening_paren;
+ my $im1 = $i_opening_paren - 1;
+ my $im2 = $i_opening_paren - 2;
+ my $im3 = $i_opening_paren - 3;
+ my $typem1 = $types_to_go[$im1];
+ my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
- my $bond_str_2 = $bond_str;
+ if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
+ $i_opening_minus = $i_opening_paren;
+ }
+ elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
+ $i_opening_minus = $im1 if $im1 >= 0;
- #---------------------------------------------------------------
- # End of hardwired rules
- #---------------------------------------------------------------
+ # walk back to improve length estimate
+ for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
+ last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
+ $i_opening_minus = $j;
+ }
+ if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
+ }
+ elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
+ elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
+ $i_opening_minus = $im2;
+ }
+ return $i_opening_minus;
+}
- #---------------------------------------------------------------
- # Bond Strength Section 3:
- # Apply table rules. These have priority over the above
- # hardwired rules.
- #---------------------------------------------------------------
+{ ## begin closure set_comma_breakpoints_do
- my $tabulated_bond_str;
- my $ltype = $type;
- my $rtype = $next_nonblank_type;
- if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
- if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
- $rtype = $next_nonblank_type . $next_nonblank_token;
- }
+ my %is_keyword_with_special_leading_term;
- if ( $binary_bond_strength{$ltype}{$rtype} ) {
- $bond_str = $binary_bond_strength{$ltype}{$rtype};
- $tabulated_bond_str = $bond_str;
- }
+ BEGIN {
- if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
- $bond_str = NO_BREAK;
- $tabulated_bond_str = $bond_str;
- }
- my $bond_str_3 = $bond_str;
+ # These keywords have prototypes which allow a special leading item
+ # followed by a list
+ my @q =
+ qw(formline grep kill map printf sprintf push chmod join pack unshift);
+ @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
+ }
- # If the hardwired rules conflict with the tabulated bond
- # strength then there is an inconsistency that should be fixed
- $DEBUG_BOND
- && $tabulated_bond_str
- && $bond_str_1
- && $bond_str_1 != $bond_str_2
- && $bond_str_2 != $tabulated_bond_str
- && do {
- print STDERR
-"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
- };
+ my $DEBUG_SPARSE;
- #-----------------------------------------------------------------
- # Bond Strength Section 4:
- # Modify strengths of certain tokens which often occur in sequence
- # by adding a small bias to each one in turn so that the breaks
- # occur from left to right.
- #
- # Note that we only changing strengths by small amounts here,
- # and usually increasing, so we should not be altering any NO_BREAKs.
- # Other routines which check for NO_BREAKs will use a tolerance
- # of one to avoid any problem.
- #-----------------------------------------------------------------
+ sub set_comma_breakpoints_do {
- # The bias tables use special keys
- my $left_key = bias_table_key( $type, $token );
- my $right_key =
- bias_table_key( $next_nonblank_type, $next_nonblank_token );
+ # Given a list with some commas, set breakpoints at some of the
+ # commas, if necessary, to make it easy to read.
- # add any bias set by sub scan_list at old comma break points.
- if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
+ my ( $self, %input_hash ) = @_;
- # bias left token
- elsif ( defined( $bias{$left_key} ) ) {
- if ( !$want_break_before{$left_key} ) {
- $bias{$left_key} += $delta_bias;
- $bond_str += $bias{$left_key};
- }
- }
+ my $depth = $input_hash{depth};
+ my $i_opening_paren = $input_hash{i_opening_paren};
+ my $i_closing_paren = $input_hash{i_closing_paren};
+ my $item_count = $input_hash{item_count};
+ my $identifier_count = $input_hash{identifier_count};
+ my $rcomma_index = $input_hash{rcomma_index};
+ my $next_nonblank_type = $input_hash{next_nonblank_type};
+ my $list_type = $input_hash{list_type};
+ my $interrupted = $input_hash{interrupted};
+ my $rdo_not_break_apart = $input_hash{rdo_not_break_apart};
+ my $must_break_open = $input_hash{must_break_open};
+ my $has_broken_sublist = $input_hash{has_broken_sublist};
- # bias right token
- if ( defined( $bias{$right_key} ) ) {
- if ( $want_break_before{$right_key} ) {
+ # nothing to do if no commas seen
+ return if ( $item_count < 1 );
- # for leading '.' align all but 'short' quotes; the idea
- # is to not place something like "\n" on a single line.
- if ( $right_key eq '.' ) {
- unless (
- $last_nonblank_type eq '.'
- && ( $token_length <=
- $rOpts_short_concatenation_item_length )
- && ( !$is_closing_token{$token} )
- )
- {
- $bias{$right_key} += $delta_bias;
- }
- }
- else {
- $bias{$right_key} += $delta_bias;
- }
- $bond_str += $bias{$right_key};
- }
- }
- my $bond_str_4 = $bond_str;
+ my $rOpts_break_at_old_comma_breakpoints =
+ $rOpts->{'break-at-old-comma-breakpoints'};
+ my $rOpts_maximum_fields_per_table =
+ $rOpts->{'maximum-fields-per-table'};
- #---------------------------------------------------------------
- # Bond Strength Section 5:
- # Fifth Approximation.
- # Take nesting depth into account by adding the nesting depth
- # to the bond strength.
- #---------------------------------------------------------------
- my $strength;
+ my $i_first_comma = $rcomma_index->[0];
+ my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
+ my $i_last_comma = $i_true_last_comma;
+ if ( $i_last_comma >= $max_index_to_go ) {
+ $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
+ return if ( $item_count < 1 );
+ }
- if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
- if ( $total_nesting_depth > 0 ) {
- $strength = $bond_str + $total_nesting_depth;
- }
- else {
- $strength = $bond_str;
- }
- }
- else {
- $strength = NO_BREAK;
+ #---------------------------------------------------------------
+ # find lengths of all items in the list to calculate page layout
+ #---------------------------------------------------------------
+ my $comma_count = $item_count;
+ my @item_lengths;
+ my @i_term_begin;
+ my @i_term_end;
+ my @i_term_comma;
+ my $i_prev_plus;
+ my @max_length = ( 0, 0 );
+ my $first_term_length;
+ my $i = $i_opening_paren;
+ my $is_odd = 1;
- # For critical code such as lines with here targets we must
- # be absolutely sure that we do not allow a break. So for
- # these the nobreak flag exceeds 1 as a signal. Otherwise we
- # can run into trouble when small tolerances are added.
- $strength += 1 if ( $nobreak_to_go[$i] > 1 );
- }
+ foreach my $j ( 0 .. $comma_count - 1 ) {
+ $is_odd = 1 - $is_odd;
+ $i_prev_plus = $i + 1;
+ $i = $rcomma_index->[$j];
- #---------------------------------------------------------------
- # Bond Strength Section 6:
- # Sixth Approximation. Welds.
- #---------------------------------------------------------------
+ my $i_term_end =
+ ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
+ my $i_term_begin =
+ ( $types_to_go[$i_prev_plus] eq 'b' )
+ ? $i_prev_plus + 1
+ : $i_prev_plus;
+ push @i_term_begin, $i_term_begin;
+ push @i_term_end, $i_term_end;
+ push @i_term_comma, $i;
- # Do not allow a break within welds,
- if ( $self->weld_len_right_to_go($i) ) { $strength = NO_BREAK }
+ # note: currently adding 2 to all lengths (for comma and space)
+ my $length =
+ 2 + token_sequence_length( $i_term_begin, $i_term_end );
+ push @item_lengths, $length;
- # But encourage breaking after opening welded tokens
- elsif ($self->weld_len_left_to_go($i)
- && $is_opening_token{$token} )
- {
- $strength -= 1;
+ if ( $j == 0 ) {
+ $first_term_length = $length;
}
+ else {
- # always break after side comment
- if ( $type eq '#' ) { $strength = 0 }
+ if ( $length > $max_length[$is_odd] ) {
+ $max_length[$is_odd] = $length;
+ }
+ }
+ }
- $bond_strength_to_go[$i] = $strength;
+ # now we have to make a distinction between the comma count and item
+ # count, because the item count will be one greater than the comma
+ # count if the last item is not terminated with a comma
+ my $i_b =
+ ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
+ ? $i_last_comma + 1
+ : $i_last_comma;
+ my $i_e =
+ ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
+ ? $i_closing_paren - 2
+ : $i_closing_paren - 1;
+ my $i_effective_last_comma = $i_last_comma;
- $DEBUG_BOND && do {
- my $str = substr( $token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- print STDOUT
-"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
- };
- } ## end main loop
- return;
- } ## end sub set_bond_strengths
-} ## end closure set_bond_strengths
+ my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
-sub pad_array_to_go {
+ if ( $last_item_length > 0 ) {
- # To simplify coding in scan_list and set_bond_strengths, it helps
- # to create some extra blank tokens at the end of the arrays
- # FIXME: it would be nice to eliminate the need for this routine.
- my ($self) = @_;
- $tokens_to_go[ $max_index_to_go + 1 ] = '';
- $tokens_to_go[ $max_index_to_go + 2 ] = '';
- $types_to_go[ $max_index_to_go + 1 ] = 'b';
- $types_to_go[ $max_index_to_go + 2 ] = 'b';
- $nesting_depth_to_go[ $max_index_to_go + 1 ] =
- $nesting_depth_to_go[$max_index_to_go];
+ # add 2 to length because other lengths include a comma and a blank
+ $last_item_length += 2;
+ push @item_lengths, $last_item_length;
+ push @i_term_begin, $i_b + 1;
+ push @i_term_end, $i_e;
+ push @i_term_comma, undef;
- # /^[R\}\)\]]$/
- if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
- if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
+ my $i_odd = $item_count % 2;
- # shouldn't happen:
- unless ( get_saw_brace_error() ) {
- warning(
-"Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
- );
- report_definite_bug();
+ if ( $last_item_length > $max_length[$i_odd] ) {
+ $max_length[$i_odd] = $last_item_length;
}
- }
- else {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
- }
- }
- # /^[L\{\(\[]$/
- elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
- }
- return;
-}
+ $item_count++;
+ $i_effective_last_comma = $i_e + 1;
-{ ## begin closure scan_list
+ if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
+ $identifier_count++;
+ }
+ }
- # These routines and variables are involved in finding good
- # places to break long lists.
+ #---------------------------------------------------------------
+ # End of length calculations
+ #---------------------------------------------------------------
- my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_nonblank_block_type, $last_old_breakpoint_count,
- $minimum_depth, $next_nonblank_block_type,
- $next_nonblank_token, $next_nonblank_type,
- $old_breakpoint_count, $starting_breakpoint_count,
- $starting_depth, $token,
- $type, $type_sequence,
- );
+ #---------------------------------------------------------------
+ # Compound List Rule 1:
+ # Break at (almost) every comma for a list containing a broken
+ # sublist. This has higher priority than the Interrupted List
+ # Rule.
+ #---------------------------------------------------------------
+ if ($has_broken_sublist) {
- my (
- @breakpoint_stack, @breakpoint_undo_stack,
- @comma_index, @container_type,
- @identifier_count_stack, @index_before_arrow,
- @interrupted_list, @item_count_stack,
- @last_comma_index, @last_dot_index,
- @last_nonblank_type, @old_breakpoint_count_stack,
- @opening_structure_index_stack, @rfor_semicolon_list,
- @has_old_logical_breakpoints, @rand_or_list,
- @i_equals,
- );
+ # Break at every comma except for a comma between two
+ # simple, small terms. This prevents long vertical
+ # columns of, say, just 0's.
+ my $small_length = 10; # 2 + actual maximum length wanted
- # these arrays must retain values between calls
- my ( @has_broken_sublist, @dont_align, @want_comma_break );
+ # We'll insert a break in long runs of small terms to
+ # allow alignment in uniform tables.
+ my $skipped_count = 0;
+ my $columns = table_columns_available($i_first_comma);
+ my $fields = int( $columns / $small_length );
+ if ( $rOpts_maximum_fields_per_table
+ && $fields > $rOpts_maximum_fields_per_table )
+ {
+ $fields = $rOpts_maximum_fields_per_table;
+ }
+ my $max_skipped_count = $fields - 1;
- sub initialize_scan_list {
- @dont_align = ();
- @has_broken_sublist = ();
- @want_comma_break = ();
- return;
- }
+ my $is_simple_last_term = 0;
+ my $is_simple_next_term = 0;
+ foreach my $j ( 0 .. $item_count ) {
+ $is_simple_last_term = $is_simple_next_term;
+ $is_simple_next_term = 0;
+ if ( $j < $item_count
+ && $i_term_end[$j] == $i_term_begin[$j]
+ && $item_lengths[$j] <= $small_length )
+ {
+ $is_simple_next_term = 1;
+ }
+ next if $j == 0;
+ if ( $is_simple_last_term
+ && $is_simple_next_term
+ && $skipped_count < $max_skipped_count )
+ {
+ $skipped_count++;
+ }
+ else {
+ $skipped_count = 0;
+ my $i = $i_term_comma[ $j - 1 ];
+ last unless defined $i;
+ $self->set_forced_breakpoint($i);
+ }
+ }
- # routine to define essential variables when we go 'up' to
- # a new depth
- sub check_for_new_minimum_depth {
- my $depth = shift;
- if ( $depth < $minimum_depth ) {
+ # always break at the last comma if this list is
+ # interrupted; we wouldn't want to leave a terminal '{', for
+ # example.
+ if ($interrupted) {
+ $self->set_forced_breakpoint($i_true_last_comma);
+ }
+ return;
+ }
- $minimum_depth = $depth;
+#my ( $a, $b, $c ) = caller();
+#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
+#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
+#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
- # these arrays need not retain values between calls
- $breakpoint_stack[$depth] = $starting_breakpoint_count;
- $container_type[$depth] = "";
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 1;
- $item_count_stack[$depth] = 0;
- $last_nonblank_type[$depth] = "";
- $opening_structure_index_stack[$depth] = -1;
+ #---------------------------------------------------------------
+ # Interrupted List Rule:
+ # A list is forced to use old breakpoints if it was interrupted
+ # by side comments or blank lines, or requested by user.
+ #---------------------------------------------------------------
+ if ( $rOpts_break_at_old_comma_breakpoints
+ || $interrupted
+ || $i_opening_paren < 0 )
+ {
+ $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
+ return;
+ }
- $breakpoint_undo_stack[$depth] = undef;
- $comma_index[$depth] = undef;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $old_breakpoint_count_stack[$depth] = undef;
- $has_old_logical_breakpoints[$depth] = 0;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
+ #---------------------------------------------------------------
+ # Looks like a list of items. We have to look at it and size it up.
+ #---------------------------------------------------------------
- # these arrays must retain values between calls
- if ( !defined( $has_broken_sublist[$depth] ) ) {
- $dont_align[$depth] = 0;
- $has_broken_sublist[$depth] = 0;
- $want_comma_break[$depth] = 0;
- }
- }
- return;
- }
+ my $opening_token = $tokens_to_go[$i_opening_paren];
+ my $opening_environment =
+ $container_environment_to_go[$i_opening_paren];
- # routine to decide which commas to break at within a container;
- # returns:
- # $bp_count = number of comma breakpoints set
- # $do_not_break_apart = a flag indicating if container need not
- # be broken open
- sub set_comma_breakpoints {
+ #-------------------------------------------------------------------
+ # Return if this will fit on one line
+ #-------------------------------------------------------------------
- my ( $self, $dd ) = @_;
- my $bp_count = 0;
- my $do_not_break_apart = 0;
+ my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
+ return
+ unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
+ > 0;
- # anything to do?
- if ( $item_count_stack[$dd] ) {
+ #-------------------------------------------------------------------
+ # Now we know that this block spans multiple lines; we have to set
+ # at least one breakpoint -- real or fake -- as a signal to break
+ # open any outer containers.
+ #-------------------------------------------------------------------
+ set_fake_breakpoint();
- # handle commas not in containers...
- if ( $dont_align[$dd] ) {
- $self->do_uncontained_comma_breaks($dd);
- }
+ # be sure we do not extend beyond the current list length
+ if ( $i_effective_last_comma >= $max_index_to_go ) {
+ $i_effective_last_comma = $max_index_to_go - 1;
+ }
- # handle commas within containers...
- else {
- my $fbc = get_forced_breakpoint_count();
+ # Set a flag indicating if we need to break open to keep -lp
+ # items aligned. This is necessary if any of the list terms
+ # exceeds the available space after the '('.
+ my $need_lp_break_open = $must_break_open;
+ if ( $rOpts_line_up_parentheses && !$must_break_open ) {
+ my $columns_if_unbroken =
+ maximum_line_length($i_opening_minus) -
+ total_line_length( $i_opening_minus, $i_opening_paren );
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
+ || ( $max_length[1] > $columns_if_unbroken )
+ || ( $first_term_length > $columns_if_unbroken );
+ }
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+ # Specify if the list must have an even number of fields or not.
+ # It is generally safest to assume an even number, because the
+ # list items might be a hash list. But if we can be sure that
+ # it is not a hash, then we can allow an odd number for more
+ # flexibility.
+ my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
- $self->set_comma_breakpoints_do(
- depth => $dd,
- i_opening_paren => $opening_structure_index_stack[$dd],
- i_closing_paren => $i,
- item_count => $item_count_stack[$dd],
- identifier_count => $identifier_count_stack[$dd],
- rcomma_index => $comma_index[$dd],
- next_nonblank_type => $next_nonblank_type,
- list_type => $container_type[$dd],
- interrupted => $interrupted_list[$dd],
- rdo_not_break_apart => \$do_not_break_apart,
- must_break_open => $must_break_open,
- has_broken_sublist => $has_broken_sublist[$dd],
- );
- $bp_count = get_forced_breakpoint_count() - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
- }
+ if ( $identifier_count >= $item_count - 1
+ || $is_assignment{$next_nonblank_type}
+ || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
+ )
+ {
+ $odd_or_even = 1;
}
- return ( $bp_count, $do_not_break_apart );
- }
- sub do_uncontained_comma_breaks {
+ # do we have a long first term which should be
+ # left on a line by itself?
+ my $use_separate_first_term = (
+ $odd_or_even == 1 # only if we can use 1 field/line
+ && $item_count > 3 # need several items
+ && $first_term_length >
+ 2 * $max_length[0] - 2 # need long first term
+ && $first_term_length >
+ 2 * $max_length[1] - 2 # need long first term
+ );
- # Handle commas not in containers...
- # This is a catch-all routine for commas that we
- # don't know what to do with because the don't fall
- # within containers. We will bias the bond strength
- # to break at commas which ended lines in the input
- # file. This usually works better than just trying
- # to put as many items on a line as possible. A
- # downside is that if the input file is garbage it
- # won't work very well. However, the user can always
- # prevent following the old breakpoints with the
- # -iob flag.
- my ( $self, $dd ) = @_;
- my $bias = -.01;
- my $old_comma_break_count = 0;
- foreach my $ii ( @{ $comma_index[$dd] } ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $old_comma_break_count++;
- $bond_strength_to_go[$ii] = $bias;
+ # or do we know from the type of list that the first term should
+ # be placed alone?
+ if ( !$use_separate_first_term ) {
+ if ( $is_keyword_with_special_leading_term{$list_type} ) {
+ $use_separate_first_term = 1;
- # reduce bias magnitude to force breaks in order
- $bias *= 0.99;
+ # should the container be broken open?
+ if ( $item_count < 3 ) {
+ if ( $i_first_comma - $i_opening_paren < 4 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ elsif ($first_term_length < 20
+ && $i_first_comma - $i_opening_paren < 4 )
+ {
+ my $columns = table_columns_available($i_first_comma);
+ if ( $first_term_length < $columns ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
}
}
- # Also put a break before the first comma if
- # (1) there was a break there in the input, and
- # (2) there was exactly one old break before the first comma break
- # (3) OLD: there are multiple old comma breaks
- # (3) NEW: there are one or more old comma breaks (see return example)
- #
- # For example, we will follow the user and break after
- # 'print' in this snippet:
- # print
- # "conformability (Not the same dimension)\n",
- # "\t", $have, " is ", text_unit($hu), "\n",
- # "\t", $want, " is ", text_unit($wu), "\n",
- # ;
- #
- # Another example, just one comma, where we will break after
- # the return:
- # return
- # $x * cos($a) - $y * sin($a),
- # $x * sin($a) + $y * cos($a);
+ # if so,
+ if ($use_separate_first_term) {
- # Breaking a print statement:
- # print SAVEOUT
- # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
- # ( $? & 128 ) ? " -- core dumped" : "", "\n";
- #
- # But we will not force a break after the opening paren here
- # (causes a blinker):
- # $heap->{stream}->set_output_filter(
- # poe::filter::reference->new('myotherfreezer') ),
- # ;
- #
- my $i_first_comma = $comma_index[$dd]->[0];
- if ( $old_breakpoint_to_go[$i_first_comma] ) {
- my $level_comma = $levels_to_go[$i_first_comma];
- my $ibreak = -1;
- my $obp_count = 0;
- for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $obp_count++;
- last if ( $obp_count > 1 );
- $ibreak = $ii
- if ( $levels_to_go[$ii] == $level_comma );
- }
- }
+ # ..set a break and update starting values
+ $use_separate_first_term = 1;
+ $self->set_forced_breakpoint($i_first_comma);
+ $i_opening_paren = $i_first_comma;
+ $i_first_comma = $rcomma_index->[1];
+ $item_count--;
+ return if $comma_count == 1;
+ shift @item_lengths;
+ shift @i_term_begin;
+ shift @i_term_end;
+ shift @i_term_comma;
+ }
- # Changed rule from multiple old commas to just one here:
- if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
- {
- # Do not to break before an opening token because
- # it can lead to "blinkers".
- my $ibreakm = $ibreak;
- $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
- if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
- {
- $self->set_forced_breakpoint($ibreak);
- }
+ # if not, update the metrics to include the first term
+ else {
+ if ( $first_term_length > $max_length[0] ) {
+ $max_length[0] = $first_term_length;
}
}
- return;
- }
- my %is_logical_container;
+ # Field width parameters
+ my $pair_width = ( $max_length[0] + $max_length[1] );
+ my $max_width =
+ ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
- BEGIN {
- my @q = qw# if elsif unless while and or err not && | || ? : ! #;
- @is_logical_container{@q} = (1) x scalar(@q);
- }
+ # Number of free columns across the page width for laying out tables
+ my $columns = table_columns_available($i_first_comma);
- sub set_for_semicolon_breakpoints {
- my ( $self, $dd ) = @_;
- foreach ( @{ $rfor_semicolon_list[$dd] } ) {
- $self->set_forced_breakpoint($_);
+ # Estimated maximum number of fields which fit this space
+ # This will be our first guess
+ my $number_of_fields_max =
+ maximum_number_of_fields( $columns, $odd_or_even, $max_width,
+ $pair_width );
+ my $number_of_fields = $number_of_fields_max;
+
+ # Find the best-looking number of fields
+ # and make this our second guess if possible
+ my ( $number_of_fields_best, $ri_ragged_break_list,
+ $new_identifier_count )
+ = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
+ \@item_lengths, $max_width );
+
+ if ( $number_of_fields_best != 0
+ && $number_of_fields_best < $number_of_fields_max )
+ {
+ $number_of_fields = $number_of_fields_best;
}
- return;
- }
- sub set_logical_breakpoints {
- my ( $self, $dd ) = @_;
+ # ----------------------------------------------------------------------
+ # If we are crowded and the -lp option is being used, try to
+ # undo some indentation
+ # ----------------------------------------------------------------------
if (
- $item_count_stack[$dd] == 0
- && $is_logical_container{ $container_type[$dd] }
-
- || $has_old_logical_breakpoints[$dd]
+ $rOpts_line_up_parentheses
+ && (
+ $number_of_fields == 0
+ || ( $number_of_fields == 1
+ && $number_of_fields != $number_of_fields_best )
+ )
)
{
+ my $available_spaces =
+ $self->get_available_spaces_to_go($i_first_comma);
+ if ( $available_spaces > 0 ) {
- # Look for breaks in this order:
- # 0 1 2 3
- # or and || &&
- foreach my $i ( 0 .. 3 ) {
- if ( $rand_or_list[$dd][$i] ) {
- foreach ( @{ $rand_or_list[$dd][$i] } ) {
- $self->set_forced_breakpoint($_);
- }
+ my $spaces_wanted = $max_width - $columns; # for 1 field
- # break at any 'if' and 'unless' too
- foreach ( @{ $rand_or_list[$dd][4] } ) {
- $self->set_forced_breakpoint($_);
- }
- $rand_or_list[$dd] = [];
- last;
+ if ( $number_of_fields_best == 0 ) {
+ $number_of_fields_best =
+ get_maximum_fields_wanted( \@item_lengths );
}
- }
- }
- return;
- }
-
- sub is_unbreakable_container {
-
- # never break a container of one of these types
- # because bad things can happen (map1.t)
- my $dd = shift;
- return $is_sort_map_grep{ $container_type[$dd] };
- }
- sub scan_list {
+ if ( $number_of_fields_best != 1 ) {
+ my $spaces_wanted_2 =
+ 1 + $pair_width - $columns; # for 2 fields
+ if ( $available_spaces > $spaces_wanted_2 ) {
+ $spaces_wanted = $spaces_wanted_2;
+ }
+ }
- my ($self) = @_;
+ if ( $spaces_wanted > 0 ) {
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $i_first_comma,
+ $spaces_wanted );
- # This routine is responsible for setting line breaks for all lists,
- # so that hierarchical structure can be displayed and so that list
- # items can be vertically aligned. The output of this routine is
- # stored in the array @forced_breakpoint_to_go, which is used to set
- # final breakpoints.
+ # redo the math
+ if ( $deleted_spaces > 0 ) {
+ $columns = table_columns_available($i_first_comma);
+ $number_of_fields_max =
+ maximum_number_of_fields( $columns, $odd_or_even,
+ $max_width, $pair_width );
+ $number_of_fields = $number_of_fields_max;
- # It is called once per batch if the batch is a list.
- my $rOpts_break_at_old_attribute_breakpoints =
- $rOpts->{'break-at-old-attribute-breakpoints'};
- my $rOpts_break_at_old_comma_breakpoints =
- $rOpts->{'break-at-old-comma-breakpoints'};
- my $rOpts_break_at_old_keyword_breakpoints =
- $rOpts->{'break-at-old-keyword-breakpoints'};
- my $rOpts_break_at_old_logical_breakpoints =
- $rOpts->{'break-at-old-logical-breakpoints'};
- my $rOpts_break_at_old_method_breakpoints =
- $rOpts->{'break-at-old-method-breakpoints'};
- my $rOpts_break_at_old_semicolon_breakpoints =
- $rOpts->{'break-at-old-semicolon-breakpoints'};
- my $rOpts_break_at_old_ternary_breakpoints =
- $rOpts->{'break-at-old-ternary-breakpoints'};
- my $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+ if ( $number_of_fields_best == 1
+ && $number_of_fields >= 1 )
+ {
+ $number_of_fields = $number_of_fields_best;
+ }
+ }
+ }
+ }
+ }
- $starting_depth = $nesting_depth_to_go[0];
+ # try for one column if two won't work
+ if ( $number_of_fields <= 0 ) {
+ $number_of_fields = int( $columns / $max_width );
+ }
- $block_type = ' ';
- $current_depth = $starting_depth;
- $i = -1;
- $last_colon_sequence_number = -1;
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_nonblank_block_type = ' ';
- $last_old_breakpoint_count = 0;
- $minimum_depth = $current_depth + 1; # forces update in check below
- $old_breakpoint_count = 0;
- $starting_breakpoint_count = get_forced_breakpoint_count();
- $token = ';';
- $type = ';';
- $type_sequence = '';
+ # The user can place an upper bound on the number of fields,
+ # which can be useful for doing maintenance on tables
+ if ( $rOpts_maximum_fields_per_table
+ && $number_of_fields > $rOpts_maximum_fields_per_table )
+ {
+ $number_of_fields = $rOpts_maximum_fields_per_table;
+ }
- my $total_depth_variation = 0;
- my $i_old_assignment_break;
- my $depth_last = $starting_depth;
+ # How many columns (characters) and lines would this container take
+ # if no additional whitespace were added?
+ my $packed_columns = token_sequence_length( $i_opening_paren + 1,
+ $i_effective_last_comma + 1 );
+ if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
+ my $packed_lines = 1 + int( $packed_columns / $columns );
- check_for_new_minimum_depth($current_depth);
+ # are we an item contained in an outer list?
+ my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
- my $is_long_line = $self->excess_line_length( 0, $max_index_to_go ) > 0;
- my $want_previous_breakpoint = -1;
+ if ( $number_of_fields <= 0 ) {
- my $saw_good_breakpoint;
- my $i_line_end = -1;
- my $i_line_start = -1;
+# #---------------------------------------------------------------
+# # We're in trouble. We can't find a single field width that works.
+# # There is no simple answer here; we may have a single long list
+# # item, or many.
+# #---------------------------------------------------------------
+#
+# In many cases, it may be best to not force a break if there is just one
+# comma, because the standard continuation break logic will do a better
+# job without it.
+#
+# In the common case that all but one of the terms can fit
+# on a single line, it may look better not to break open the
+# containing parens. Consider, for example
+#
+# $color =
+# join ( '/',
+# sort { $color_value{$::a} <=> $color_value{$::b}; }
+# keys %colors );
+#
+# which will look like this with the container broken:
+#
+# $color = join (
+# '/',
+# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
+# );
+#
+# Here is an example of this rule for a long last term:
+#
+# log_message( 0, 256, 128,
+# "Number of routes in adj-RIB-in to be considered: $peercount" );
+#
+# And here is an example with a long first term:
+#
+# $s = sprintf(
+# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
+# $r, $pu, $ps, $cu, $cs, $tt
+# )
+# if $style eq 'all';
- # loop over all tokens in this batch
- while ( ++$i <= $max_index_to_go ) {
- if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- $last_nonblank_block_type = $block_type;
- } ## end if ( $type ne 'b' )
- $type = $types_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $token = $tokens_to_go[$i];
- $type_sequence = $type_sequence_to_go[$i];
- my $next_type = $types_to_go[ $i + 1 ];
- my $next_token = $tokens_to_go[ $i + 1 ];
- my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+ my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
+ my $long_last_term =
+ $self->excess_line_length( 0, $i_last_comma ) <= 0;
+ my $long_first_term =
+ $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
+ <= 0;
- # set break if flag was set
- if ( $want_previous_breakpoint >= 0 ) {
- $self->set_forced_breakpoint($want_previous_breakpoint);
- $want_previous_breakpoint = -1;
- }
+ # break at every comma ...
+ if (
- $last_old_breakpoint_count = $old_breakpoint_count;
- if ( $old_breakpoint_to_go[$i] ) {
- $i_line_end = $i;
- $i_line_start = $i_next_nonblank;
+ # if requested by user or is best looking
+ $number_of_fields_best == 1
- $old_breakpoint_count++;
+ # or if this is a sublist of a larger list
+ || $in_hierarchical_list
- # Break before certain keywords if user broke there and
- # this is a 'safe' break point. The idea is to retain
- # any preferred breaks for sequential list operations,
- # like a schwartzian transform.
- if ($rOpts_break_at_old_keyword_breakpoints) {
- if (
- $next_nonblank_type eq 'k'
- && $is_keyword_returning_list{$next_nonblank_token}
- && ( $type =~ /^[=\)\]\}Riw]$/
- || $type eq 'k'
- && $is_keyword_returning_list{$token} )
- )
- {
+ # or if multiple commas and we don't have a long first or last
+ # term
+ || ( $comma_count > 1
+ && !( $long_last_term || $long_first_term ) )
+ )
+ {
+ foreach ( 0 .. $comma_count - 1 ) {
+ $self->set_forced_breakpoint( $rcomma_index->[$_] );
+ }
+ }
+ elsif ($long_last_term) {
- # we actually have to set this break next time through
- # the loop because if we are at a closing token (such
- # as '}') which forms a one-line block, this break might
- # get undone.
- $want_previous_breakpoint = $i;
- } ## end if ( $next_nonblank_type...)
- } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+ $self->set_forced_breakpoint($i_last_comma);
+ ${$rdo_not_break_apart} = 1 unless $must_break_open;
+ }
+ elsif ($long_first_term) {
- # Break before attributes if user broke there
- if ($rOpts_break_at_old_attribute_breakpoints) {
- if ( $next_nonblank_type eq 'A' ) {
- $want_previous_breakpoint = $i;
- }
- }
+ $self->set_forced_breakpoint($i_first_comma);
+ }
+ else {
- # remember an = break as possible good break point
- if ( $is_assignment{$type} ) {
- $i_old_assignment_break = $i;
- }
- elsif ( $is_assignment{$next_nonblank_type} ) {
- $i_old_assignment_break = $i_next_nonblank;
- }
- } ## end if ( $old_breakpoint_to_go...)
+ # let breaks be defined by default bond strength logic
+ }
+ return;
+ }
- next if ( $type eq 'b' );
- $depth = $nesting_depth_to_go[ $i + 1 ];
+ # --------------------------------------------------------
+ # We have a tentative field count that seems to work.
+ # How many lines will this require?
+ # --------------------------------------------------------
+ my $formatted_lines = $item_count / ($number_of_fields);
+ if ( $formatted_lines != int $formatted_lines ) {
+ $formatted_lines = 1 + int $formatted_lines;
+ }
- $total_depth_variation += abs( $depth - $depth_last );
- $depth_last = $depth;
+ # So far we've been trying to fill out to the right margin. But
+ # compact tables are easier to read, so let's see if we can use fewer
+ # fields without increasing the number of lines.
+ $number_of_fields =
+ compactify_table( $item_count, $number_of_fields, $formatted_lines,
+ $odd_or_even );
- # safety check - be sure we always break after a comment
- # Shouldn't happen .. an error here probably means that the
- # nobreak flag did not get turned off correctly during
- # formatting.
- if ( $type eq '#' ) {
- if ( $i != $max_index_to_go ) {
- warning(
-"Non-fatal program bug: backup logic needed to break after a comment\n"
- );
- report_definite_bug();
- $nobreak_to_go[$i] = 0;
- $self->set_forced_breakpoint($i);
- } ## end if ( $i != $max_index_to_go)
- } ## end if ( $type eq '#' )
+ # How many spaces across the page will we fill?
+ my $columns_per_line =
+ ( int $number_of_fields / 2 ) * $pair_width +
+ ( $number_of_fields % 2 ) * $max_width;
- # Force breakpoints at certain tokens in long lines.
- # Note that such breakpoints will be undone later if these tokens
- # are fully contained within parens on a line.
- if (
+ my $formatted_columns;
- # break before a keyword within a line
- $type eq 'k'
- && $i > 0
+ if ( $number_of_fields > 1 ) {
+ $formatted_columns =
+ ( $pair_width * ( int( $item_count / 2 ) ) +
+ ( $item_count % 2 ) * $max_width );
+ }
+ else {
+ $formatted_columns = $max_width * $item_count;
+ }
+ if ( $formatted_columns < $packed_columns ) {
+ $formatted_columns = $packed_columns;
+ }
- # if one of these keywords:
- && $token =~ /^(if|unless|while|until|for)$/
+ my $unused_columns = $formatted_columns - $packed_columns;
- # but do not break at something like '1 while'
- && ( $last_nonblank_type ne 'n' || $i > 2 )
+ # set some empirical parameters to help decide if we should try to
+ # align; high sparsity does not look good, especially with few lines
+ my $sparsity = ($unused_columns) / ($formatted_columns);
+ my $max_allowed_sparsity =
+ ( $item_count < 3 ) ? 0.1
+ : ( $packed_lines == 1 ) ? 0.15
+ : ( $packed_lines == 2 ) ? 0.4
+ : 0.7;
+
+ # Begin check for shortcut methods, which avoid treating a list
+ # as a table for relatively small parenthesized lists. These
+ # are usually easier to read if not formatted as tables.
+ if (
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
+ && $opening_environment eq 'BLOCK' # not a sub-container
+ && $opening_token eq '(' # is paren list
+ )
+ {
- # and let keywords follow a closing 'do' brace
- && $last_nonblank_block_type ne 'do'
+ # Shortcut method 1: for -lp and just one comma:
+ # This is a no-brainer, just break at the comma.
+ if (
+ $rOpts_line_up_parentheses # -lp
+ && $item_count == 2 # two items, one comma
+ && !$must_break_open
+ )
+ {
+ my $i_break = $rcomma_index->[0];
+ $self->set_forced_breakpoint($i_break);
+ ${$rdo_not_break_apart} = 1;
+ return;
- && (
- $is_long_line
+ }
- # or container is broken (by side-comment, etc)
- || ( $next_nonblank_token eq '('
- && $mate_index_to_go[$i_next_nonblank] < $i )
- )
+ # method 2 is for most small ragged lists which might look
+ # best if not displayed as a table.
+ if (
+ ( $number_of_fields == 2 && $item_count == 3 )
+ || (
+ $new_identifier_count > 0 # isn't all quotes
+ && $sparsity > 0.15
+ ) # would be fairly spaced gaps if aligned
)
{
- $self->set_forced_breakpoint( $i - 1 );
- } ## end if ( $type eq 'k' && $i...)
-
- # remember locations of -> if this is a pre-broken method chain
- if ( $type eq '->' ) {
- if ($rOpts_break_at_old_method_breakpoints) {
- # Case 1: look for lines with leading pointers
- if ( $i == $i_line_start ) {
- $self->set_forced_breakpoint( $i - 1 );
- }
+ my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
- # Case 2: look for cuddled pointer calls
- else {
+ # NOTE: we should really use the true break count here,
+ # which can be greater if there are large terms and
+ # little space, but usually this will work well enough.
+ unless ($must_break_open) {
- # look for old lines with leading ')->' or ') ->'
- # and, when found, force a break before the
- # opening paren and after the previous closing paren.
- if (
- $types_to_go[$i_line_start] eq '}'
- && ( $i == $i_line_start + 1
- || $i == $i_line_start + 2
- && $types_to_go[ $i - 1 ] eq 'b' )
- )
- {
- $self->set_forced_breakpoint( $i_line_start - 1 );
- $self->set_forced_breakpoint(
- $mate_index_to_go[$i_line_start] );
- }
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
+ {
+ ${$rdo_not_break_apart} = 1;
}
}
- } ## end if ( $type eq '->' )
-
- elsif ( $type eq ';' ) {
- if ( $i == $i_line_start
- && $rOpts_break_at_old_semicolon_breakpoints )
- {
- $self->set_forced_breakpoint( $i - 1 );
- }
+ return;
}
- # remember locations of '||' and '&&' for possible breaks if we
- # decide this is a long logical expression.
- elsif ( $type eq '||' ) {
- push @{ $rand_or_list[$depth][2] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '||' )
- elsif ( $type eq '&&' ) {
- push @{ $rand_or_list[$depth][3] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '&&' )
- elsif ( $type eq 'f' ) {
- push @{ $rfor_semicolon_list[$depth] }, $i;
- }
- elsif ( $type eq 'k' ) {
- if ( $token eq 'and' ) {
- push @{ $rand_or_list[$depth][1] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end if ( $token eq 'and' )
+ } # end shortcut methods
- # break immediately at 'or's which are probably not in a logical
- # block -- but we will break in logical breaks below so that
- # they do not add to the forced_breakpoint_count
- elsif ( $token eq 'or' ) {
- push @{ $rand_or_list[$depth][0] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- if ( $is_logical_container{ $container_type[$depth] } ) {
- }
- else {
- if ($is_long_line) { $self->set_forced_breakpoint($i) }
- elsif ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- $saw_good_breakpoint = 1;
- }
- } ## end else [ if ( $is_logical_container...)]
- } ## end elsif ( $token eq 'or' )
- elsif ( $token eq 'if' || $token eq 'unless' ) {
- push @{ $rand_or_list[$depth][4] }, $i;
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- $self->set_forced_breakpoint($i);
- }
- } ## end elsif ( $token eq 'if' ||...)
- } ## end elsif ( $type eq 'k' )
- elsif ( $is_assignment{$type} ) {
- $i_equals[$depth] = $i;
- }
+ # debug stuff
+ $DEBUG_SPARSE && do {
+ print STDOUT
+"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
- if ($type_sequence) {
+ };
- # handle any postponed closing breakpoints
- if ( $token =~ /^[\)\]\}\:]$/ ) {
- if ( $type eq ':' ) {
- $last_colon_sequence_number = $type_sequence;
+ #---------------------------------------------------------------
+ # Compound List Rule 2:
+ # If this list is too long for one line, and it is an item of a
+ # larger list, then we must format it, regardless of sparsity
+ # (ian.t). One reason that we have to do this is to trigger
+ # Compound List Rule 1, above, which causes breaks at all commas of
+ # all outer lists. In this way, the structure will be properly
+ # displayed.
+ #---------------------------------------------------------------
- # retain break at a ':' line break
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_ternary_breakpoints )
- {
+ # Decide if this list is too long for one line unless broken
+ my $total_columns = table_columns_available($i_opening_paren);
+ my $too_long = $packed_columns > $total_columns;
- $self->set_forced_breakpoint($i);
+ # For a paren list, include the length of the token just before the
+ # '(' because this is likely a sub call, and we would have to
+ # include the sub name on the same line as the list. This is still
+ # imprecise, but not too bad. (steve.t)
+ if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
- # break at previous '='
- if ( $i_equals[$depth] > 0 ) {
- $self->set_forced_breakpoint(
- $i_equals[$depth] );
- $i_equals[$depth] = -1;
- }
- } ## end if ( ( $i == $i_line_start...))
- } ## end if ( $type eq ':' )
- if ( has_postponed_breakpoint($type_sequence) ) {
- my $inc = ( $type eq ':' ) ? 0 : 1;
- $self->set_forced_breakpoint( $i - $inc );
- }
- } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
- # set breaks at ?/: if they will get separated (and are
- # not a ?/: chain), or if the '?' is at the end of the
- # line
- elsif ( $token eq '?' ) {
- my $i_colon = $mate_index_to_go[$i];
- if (
- $i_colon <= 0 # the ':' is not in this batch
- || $i == 0 # this '?' is the first token of the line
- || $i ==
- $max_index_to_go # or this '?' is the last token
- )
- {
+ # FIXME: For an item after a '=>', try to include the length of the
+ # thing before the '=>'. This is crude and should be improved by
+ # actually looking back token by token.
+ if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
+ my $i_opening_minus = $i_opening_paren - 4;
+ if ( $i_opening_minus >= 0 ) {
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
+ }
- # don't break at a '?' if preceded by ':' on
- # this line of previous ?/: pair on this line.
- # This is an attempt to preserve a chain of ?/:
- # expressions (elsif2.t). And don't break if
- # this has a side comment.
- $self->set_forced_breakpoint($i)
- unless (
- $type_sequence == (
- $last_colon_sequence_number +
- TYPE_SEQUENCE_INCREMENT
- )
- || $tokens_to_go[$max_index_to_go] eq '#'
- );
- $self->set_closing_breakpoint($i);
- } ## end if ( $i_colon <= 0 ||...)
- } ## end elsif ( $token eq '?' )
- } ## end if ($type_sequence)
+ # Always break lists contained in '[' and '{' if too long for 1 line,
+ # and always break lists which are too long and part of a more complex
+ # structure.
+ my $must_break_open_container = $must_break_open
+ || ( $too_long
+ && ( $in_hierarchical_list || $opening_token ne '(' ) );
-#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
+#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+
+ #---------------------------------------------------------------
+ # The main decision:
+ # Now decide if we will align the data into aligned columns. Do not
+ # attempt to align columns if this is a tiny table or it would be
+ # too spaced. It seems that the more packed lines we have, the
+ # sparser the list that can be allowed and still look ok.
+ #---------------------------------------------------------------
+
+ if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
+ || ( $formatted_lines < 2 )
+ || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
+ )
+ {
+
+ #---------------------------------------------------------------
+ # too sparse: would look ugly if aligned in a table;
+ #---------------------------------------------------------------
+
+ # use old breakpoints if this is a 'big' list
+ # FIXME: See if this is still necessary. sub sweep_left_to_right
+ # now fixes a lot of problems.
+ if ( $packed_lines > 2 && $item_count > 10 ) {
+ write_logfile_entry("List sparse: using old breakpoints\n");
+ $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
+ }
- #------------------------------------------------------------
- # Handle Increasing Depth..
- #
- # prepare for a new list when depth increases
- # token $i is a '(','{', or '['
- #------------------------------------------------------------
- if ( $depth > $current_depth ) {
+ # let the continuation logic handle it if 2 lines
+ else {
- $breakpoint_stack[$depth] = get_forced_breakpoint_count();
- $breakpoint_undo_stack[$depth] =
- get_forced_breakpoint_undo_count();
- $has_broken_sublist[$depth] = 0;
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 0;
- $item_count_stack[$depth] = 0;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $last_nonblank_type[$depth] = $last_nonblank_type;
- $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
- $opening_structure_index_stack[$depth] = $i;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
- $want_comma_break[$depth] = 0;
- $container_type[$depth] =
- ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
- ? $last_nonblank_token
- : "";
- $has_old_logical_breakpoints[$depth] = 0;
+ my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
- # if line ends here then signal closing token to break
- if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
- {
- $self->set_closing_breakpoint($i);
+ unless ($must_break_open_container) {
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
+ {
+ ${$rdo_not_break_apart} = 1;
+ }
}
+ }
+ return;
+ }
- # Not all lists of values should be vertically aligned..
- $dont_align[$depth] =
+ #---------------------------------------------------------------
+ # go ahead and format as a table
+ #---------------------------------------------------------------
+ write_logfile_entry(
+ "List: auto formatting with $number_of_fields fields/row\n");
- # code BLOCKS are handled at a higher level
- ( $block_type ne "" )
+ my $j_first_break =
+ $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
- # certain paren lists
- || ( $type eq '(' ) && (
+ for (
+ my $j = $j_first_break ;
+ $j < $comma_count ;
+ $j += $number_of_fields
+ )
+ {
+ my $i = $rcomma_index->[$j];
+ $self->set_forced_breakpoint($i);
+ }
+ return;
+ }
+} ## end closure set_comma_breakpoints_do
- # it does not usually look good to align a list of
- # identifiers in a parameter list, as in:
- # my($var1, $var2, ...)
- # (This test should probably be refined, for now I'm just
- # testing for any keyword)
- ( $last_nonblank_type eq 'k' )
+sub study_list_complexity {
- # a trailing '(' usually indicates a non-list
- || ( $next_nonblank_type eq '(' )
- );
+ # Look for complex tables which should be formatted with one term per line.
+ # Returns the following:
+ #
+ # \@i_ragged_break_list = list of good breakpoints to avoid lines
+ # which are hard to read
+ # $number_of_fields_best = suggested number of fields based on
+ # complexity; = 0 if any number may be used.
+ #
+ my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
+ my $item_count = @{$ri_term_begin};
+ my $complex_item_count = 0;
+ my $number_of_fields_best = $rOpts->{'maximum-fields-per-table'};
+ my $i_max = @{$ritem_lengths} - 1;
+ ##my @item_complexity;
- # patch to outdent opening brace of long if/for/..
- # statements (like this one). See similar coding in
- # set_continuation breaks. We have also catch it here for
- # short line fragments which otherwise will not go through
- # set_continuation_breaks.
- if (
- $block_type
+ my $i_last_last_break = -3;
+ my $i_last_break = -2;
+ my @i_ragged_break_list;
- # if we have the ')' but not its '(' in this batch..
- && ( $last_nonblank_token eq ')' )
- && $mate_index_to_go[$i_last_nonblank_token] < 0
+ my $definitely_complex = 30;
+ my $definitely_simple = 12;
+ my $quote_count = 0;
- # and user wants brace to left
- && !$rOpts->{'opening-brace-always-on-right'}
+ for my $i ( 0 .. $i_max ) {
+ my $ib = $ri_term_begin->[$i];
+ my $ie = $ri_term_end->[$i];
- && ( $type eq '{' ) # should be true
- && ( $token eq '{' ) # should be true
- )
- {
- $self->set_forced_breakpoint( $i - 1 );
- } ## end if ( $block_type && ( ...))
- } ## end if ( $depth > $current_depth)
+ # define complexity: start with the actual term length
+ my $weighted_length = ( $ritem_lengths->[$i] - 2 );
- #------------------------------------------------------------
- # Handle Decreasing Depth..
- #
- # finish off any old list when depth decreases
- # token $i is a ')','}', or ']'
- #------------------------------------------------------------
- elsif ( $depth < $current_depth ) {
+ ##TBD: join types here and check for variations
+ ##my $str=join "", @tokens_to_go[$ib..$ie];
- check_for_new_minimum_depth($depth);
+ my $is_quote = 0;
+ if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
+ $is_quote = 1;
+ $quote_count++;
+ }
+ elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
+ $quote_count++;
+ }
- # force all outer logical containers to break after we see on
- # old breakpoint
- $has_old_logical_breakpoints[$depth] ||=
- $has_old_logical_breakpoints[$current_depth];
+ if ( $ib eq $ie ) {
+ if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
+ $complex_item_count++;
+ $weighted_length *= 2;
+ }
+ else {
+ }
+ }
+ else {
+ if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
+ $complex_item_count++;
+ $weighted_length *= 2;
+ }
+ if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
+ $weighted_length += 4;
+ }
+ }
- # Patch to break between ') {' if the paren list is broken.
- # There is similar logic in set_continuation_breaks for
- # non-broken lists.
- if ( $token eq ')'
- && $next_nonblank_block_type
- && $interrupted_list[$current_depth]
- && $next_nonblank_type eq '{'
- && !$rOpts->{'opening-brace-always-on-right'} )
- {
- $self->set_forced_breakpoint($i);
- } ## end if ( $token eq ')' && ...
+ # add weight for extra tokens.
+ $weighted_length += 2 * ( $ie - $ib );
-#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
+## my $BUB = join '', @tokens_to_go[$ib..$ie];
+## print "# COMPLEXITY:$weighted_length $BUB\n";
- # set breaks at commas if necessary
- my ( $bp_count, $do_not_break_apart ) =
- $self->set_comma_breakpoints($current_depth);
+##push @item_complexity, $weighted_length;
- my $i_opening = $opening_structure_index_stack[$current_depth];
- my $saw_opening_structure = ( $i_opening >= 0 );
+ # now mark a ragged break after this item it if it is 'long and
+ # complex':
+ if ( $weighted_length >= $definitely_complex ) {
- # this term is long if we had to break at interior commas..
- my $is_long_term = $bp_count > 0;
+ # if we broke after the previous term
+ # then break before it too
+ if ( $i_last_break == $i - 1
+ && $i > 1
+ && $i_last_last_break != $i - 2 )
+ {
- # If this is a short container with one or more comma arrows,
- # then we will mark it as a long term to open it if requested.
- # $rOpts_comma_arrow_breakpoints =
- # 0 - open only if comma precedes closing brace
- # 1 - stable: except for one line blocks
- # 2 - try to form 1 line blocks
- # 3 - ignore =>
- # 4 - always open up if vt=0
- # 5 - stable: even for one line blocks if vt=0
- if ( !$is_long_term
- && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
- && $index_before_arrow[ $depth + 1 ] > 0
- && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
- )
- {
- $is_long_term = $rOpts_comma_arrow_breakpoints == 4
- || ( $rOpts_comma_arrow_breakpoints == 0
- && $last_nonblank_token eq ',' )
- || ( $rOpts_comma_arrow_breakpoints == 5
- && $old_breakpoint_to_go[$i_opening] );
- } ## end if ( !$is_long_term &&...)
+ ## FIXME: don't strand a small term
+ pop @i_ragged_break_list;
+ push @i_ragged_break_list, $i - 2;
+ push @i_ragged_break_list, $i - 1;
+ }
- # mark term as long if the length between opening and closing
- # parens exceeds allowed line length
- if ( !$is_long_term && $saw_opening_structure ) {
- my $i_opening_minus =
- $self->find_token_starting_list($i_opening);
+ push @i_ragged_break_list, $i;
+ $i_last_last_break = $i_last_break;
+ $i_last_break = $i;
+ }
+
+ # don't break before a small last term -- it will
+ # not look good on a line by itself.
+ elsif ($i == $i_max
+ && $i_last_break == $i - 1
+ && $weighted_length <= $definitely_simple )
+ {
+ pop @i_ragged_break_list;
+ }
+ }
+
+ my $identifier_count = $i_max + 1 - $quote_count;
+
+ # Need more tuning here..
+ if ( $max_width > 12
+ && $complex_item_count > $item_count / 2
+ && $number_of_fields_best != 2 )
+ {
+ $number_of_fields_best = 1;
+ }
- # Note: we have to allow for one extra space after a
- # closing token so that we do not strand a comma or
- # semicolon, hence the '>=' here (oneline.t)
- # Note: we ignore left weld lengths here for best results
- $is_long_term =
- $self->excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
- } ## end if ( !$is_long_term &&...)
+ return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
+}
- # We've set breaks after all comma-arrows. Now we have to
- # undo them if this can be a one-line block
- # (the only breakpoints set will be due to comma-arrows)
- if (
+sub get_maximum_fields_wanted {
- # user doesn't require breaking after all comma-arrows
- ( $rOpts_comma_arrow_breakpoints != 0 )
- && ( $rOpts_comma_arrow_breakpoints != 4 )
+ # Not all tables look good with more than one field of items.
+ # This routine looks at a table and decides if it should be
+ # formatted with just one field or not.
+ # This coding is still under development.
+ my ($ritem_lengths) = @_;
- # and if the opening structure is in this batch
- && $saw_opening_structure
+ my $number_of_fields_best = 0;
- # and either on the same old line
- && (
- $old_breakpoint_count_stack[$current_depth] ==
- $last_old_breakpoint_count
+ # For just a few items, we tentatively assume just 1 field.
+ my $item_count = @{$ritem_lengths};
+ if ( $item_count <= 5 ) {
+ $number_of_fields_best = 1;
+ }
- # or user wants to form long blocks with arrows
- || $rOpts_comma_arrow_breakpoints == 2
- )
+ # For larger tables, look at it both ways and see what looks best
+ else {
- # and we made some breakpoints between the opening and closing
- && ( $breakpoint_undo_stack[$current_depth] <
- get_forced_breakpoint_undo_count() )
+ my $is_odd = 1;
+ my @max_length = ( 0, 0 );
+ my @last_length_2 = ( undef, undef );
+ my @first_length_2 = ( undef, undef );
+ my $last_length = undef;
+ my $total_variation_1 = 0;
+ my $total_variation_2 = 0;
+ my @total_variation_2 = ( 0, 0 );
- # and this block is short enough to fit on one line
- # Note: use < because need 1 more space for possible comma
- && !$is_long_term
+ foreach my $j ( 0 .. $item_count - 1 ) {
- )
- {
- $self->undo_forced_breakpoint_stack(
- $breakpoint_undo_stack[$current_depth] );
- } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
+ $is_odd = 1 - $is_odd;
+ my $length = $ritem_lengths->[$j];
+ if ( $length > $max_length[$is_odd] ) {
+ $max_length[$is_odd] = $length;
+ }
- # now see if we have any comma breakpoints left
- my $has_comma_breakpoints =
- ( $breakpoint_stack[$current_depth] !=
- get_forced_breakpoint_count() );
+ if ( defined($last_length) ) {
+ my $dl = abs( $length - $last_length );
+ $total_variation_1 += $dl;
+ }
+ $last_length = $length;
- # update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] =
- $has_broken_sublist[$depth]
- || $has_broken_sublist[$current_depth]
- || $is_long_term
- || $has_comma_breakpoints;
+ my $ll = $last_length_2[$is_odd];
+ if ( defined($ll) ) {
+ my $dl = abs( $length - $ll );
+ $total_variation_2[$is_odd] += $dl;
+ }
+ else {
+ $first_length_2[$is_odd] = $length;
+ }
+ $last_length_2[$is_odd] = $length;
+ }
+ $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
-# Having come to the closing ')', '}', or ']', now we have to decide if we
-# should 'open up' the structure by placing breaks at the opening and
-# closing containers. This is a tricky decision. Here are some of the
-# basic considerations:
-#
-# -If this is a BLOCK container, then any breakpoints will have already
-# been set (and according to user preferences), so we need do nothing here.
-#
-# -If we have a comma-separated list for which we can align the list items,
-# then we need to do so because otherwise the vertical aligner cannot
-# currently do the alignment.
-#
-# -If this container does itself contain a container which has been broken
-# open, then it should be broken open to properly show the structure.
-#
-# -If there is nothing to align, and no other reason to break apart,
-# then do not do it.
-#
-# We will not break open the parens of a long but 'simple' logical expression.
-# For example:
-#
-# This is an example of a simple logical expression and its formatting:
-#
-# if ( $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4 )
-#
-# Most people would prefer this than the 'spacey' version:
-#
-# if (
-# $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4
-# )
-#
-# To illustrate the rules for breaking logical expressions, consider:
-#
-# FULLY DENSE:
-# if ( $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc ))
-#
-# This is on the verge of being difficult to read. The current default is to
-# open it up like this:
-#
-# DEFAULT:
-# if (
-# $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc )
-# )
-#
-# This is a compromise which tries to avoid being too dense and to spacey.
-# A more spaced version would be:
-#
-# SPACEY:
-# if (
-# $opt_excl
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-# )
-#
-# Some people might prefer the spacey version -- an option could be added. The
-# innermost expression contains a long block '( exists $ids_... ')'.
-#
-# Here is how the logic goes: We will force a break at the 'or' that the
-# innermost expression contains, but we will not break apart its opening and
-# closing containers because (1) it contains no multi-line sub-containers itself,
-# and (2) there is no alignment to be gained by breaking it open like this
-#
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-#
-# (although this looks perfectly ok and might be good for long expressions). The
-# outer 'if' container, though, contains a broken sub-container, so it will be
-# broken open to avoid too much density. Also, since it contains no 'or's, there
-# will be a forced break at its 'and'.
+ my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
+ unless ( $total_variation_2 < $factor * $total_variation_1 ) {
+ $number_of_fields_best = 1;
+ }
+ }
+ return ($number_of_fields_best);
+}
- # set some flags telling something about this container..
- my $is_simple_logical_expression = 0;
- if ( $item_count_stack[$current_depth] == 0
- && $saw_opening_structure
- && $tokens_to_go[$i_opening] eq '('
- && $is_logical_container{ $container_type[$current_depth] }
- )
- {
+sub table_columns_available {
+ my $i_first_comma = shift;
+ my $columns =
+ maximum_line_length($i_first_comma) -
+ leading_spaces_to_go($i_first_comma);
- # This seems to be a simple logical expression with
- # no existing breakpoints. Set a flag to prevent
- # opening it up.
- if ( !$has_comma_breakpoints ) {
- $is_simple_logical_expression = 1;
- }
+ # Patch: the vertical formatter does not line up lines whose lengths
+ # exactly equal the available line length because of allowances
+ # that must be made for side comments. Therefore, the number of
+ # available columns is reduced by 1 character.
+ $columns -= 1;
+ return $columns;
+}
- # This seems to be a simple logical expression with
- # breakpoints (broken sublists, for example). Break
- # at all 'or's and '||'s.
- else {
- $self->set_logical_breakpoints($current_depth);
- }
- } ## end if ( $item_count_stack...)
+sub maximum_number_of_fields {
- if ( $is_long_term
- && @{ $rfor_semicolon_list[$current_depth] } )
- {
- $self->set_for_semicolon_breakpoints($current_depth);
+ # how many fields will fit in the available space?
+ my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
+ my $max_pairs = int( $columns / $pair_width );
+ my $number_of_fields = $max_pairs * 2;
+ if ( $odd_or_even == 1
+ && $max_pairs * $pair_width + $max_width <= $columns )
+ {
+ $number_of_fields++;
+ }
+ return $number_of_fields;
+}
- # open up a long 'for' or 'foreach' container to allow
- # leading term alignment unless -lp is used.
- $has_comma_breakpoints = 1
- unless $rOpts_line_up_parentheses;
- } ## end if ( $is_long_term && ...)
+sub compactify_table {
- if (
+ # given a table with a certain number of fields and a certain number
+ # of lines, see if reducing the number of fields will make it look
+ # better.
+ my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
+ if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
+ my $min_fields;
- # breaks for code BLOCKS are handled at a higher level
- !$block_type
+ for (
+ $min_fields = $number_of_fields ;
+ $min_fields >= $odd_or_even
+ && $min_fields * $formatted_lines >= $item_count ;
+ $min_fields -= $odd_or_even
+ )
+ {
+ $number_of_fields = $min_fields;
+ }
+ }
+ return $number_of_fields;
+}
- # we do not need to break at the top level of an 'if'
- # type expression
- && !$is_simple_logical_expression
+sub set_ragged_breakpoints {
- ## modification to keep ': (' containers vertically tight;
- ## but probably better to let user set -vt=1 to avoid
- ## inconsistency with other paren types
- ## && ($container_type[$current_depth] ne ':')
+ # Set breakpoints in a list that cannot be formatted nicely as a
+ # table.
+ my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
- # otherwise, we require one of these reasons for breaking:
- && (
+ my $break_count = 0;
+ foreach ( @{$ri_ragged_break_list} ) {
+ my $j = $ri_term_comma->[$_];
+ if ($j) {
+ $self->set_forced_breakpoint($j);
+ $break_count++;
+ }
+ }
+ return $break_count;
+}
- # - this term has forced line breaks
- $has_comma_breakpoints
+sub copy_old_breakpoints {
+ my ( $self, $i_first_comma, $i_last_comma ) = @_;
+ for my $i ( $i_first_comma .. $i_last_comma ) {
+ if ( $old_breakpoint_to_go[$i] ) {
+ $self->set_forced_breakpoint($i);
+ }
+ }
+ return;
+}
- # - the opening container is separated from this batch
- # for some reason (comment, blank line, code block)
- # - this is a non-paren container spanning multiple lines
- || !$saw_opening_structure
+sub set_nobreaks {
+ my ( $self, $i, $j ) = @_;
+ if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
- # - this is a long block contained in another breakable
- # container
- || ( $is_long_term
- && $container_environment_to_go[$i_opening] ne
- 'BLOCK' )
- )
- )
- {
+ 0 && do {
+ my ( $a, $b, $c ) = caller();
+ my $forced_breakpoint_count = get_forced_breakpoint_count();
+ print STDOUT
+"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
+ };
- # For -lp option, we must put a breakpoint before
- # the token which has been identified as starting
- # this indentation level. This is necessary for
- # proper alignment.
- if ( $rOpts_line_up_parentheses && $saw_opening_structure )
- {
- my $item = $leading_spaces_to_go[ $i_opening + 1 ];
- if ( $i_opening + 1 < $max_index_to_go
- && $types_to_go[ $i_opening + 1 ] eq 'b' )
- {
- $item = $leading_spaces_to_go[ $i_opening + 2 ];
- }
- if ( defined($item) ) {
- my $i_start_2;
- my $K_start_2 = $item->get_starting_index_K();
- if ( defined($K_start_2) ) {
- $i_start_2 = $K_start_2 - $K_to_go[0];
- }
- if (
- defined($i_start_2)
+ @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
+ }
- # we are breaking after an opening brace, paren,
- # so don't break before it too
- && $i_start_2 ne $i_opening
- && $i_start_2 >= 0
- && $i_start_2 <= $max_index_to_go
- )
- {
+ # shouldn't happen; non-critical error
+ else {
+ 0 && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+ "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
+ };
+ }
+ return;
+}
- # Only break for breakpoints at the same
- # indentation level as the opening paren
- my $test1 = $nesting_depth_to_go[$i_opening];
- my $test2 = $nesting_depth_to_go[$i_start_2];
- if ( $test2 == $test1 ) {
- $self->set_forced_breakpoint(
- $i_start_2 - 1 );
- }
- } ## end if ( defined($i_start_2...))
- } ## end if ( defined($item) )
- } ## end if ( $rOpts_line_up_parentheses...)
+###############################################
+# CODE SECTION 12: Code for setting indentation
+###############################################
- # break after opening structure.
- # note: break before closing structure will be automatic
- if ( $minimum_depth <= $current_depth ) {
+sub total_line_length {
- $self->set_forced_breakpoint($i_opening)
- unless ( $do_not_break_apart
- || is_unbreakable_container($current_depth) );
+ # return length of a line of tokens ($ibeg .. $iend)
+ my ( $ibeg, $iend ) = @_;
+ return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+}
- # break at ',' of lower depth level before opening token
- if ( $last_comma_index[$depth] ) {
- $self->set_forced_breakpoint(
- $last_comma_index[$depth] );
- }
+sub maximum_line_length_for_level {
- # break at '.' of lower depth level before opening token
- if ( $last_dot_index[$depth] ) {
- $self->set_forced_breakpoint(
- $last_dot_index[$depth] );
- }
+ # return maximum line length for line starting with a given level
+ my $maximum_line_length = $rOpts_maximum_line_length;
- # break before opening structure if preceded by another
- # closing structure and a comma. This is normally
- # done by the previous closing brace, but not
- # if it was a one-line block.
- if ( $i_opening > 2 ) {
- my $i_prev =
- ( $types_to_go[ $i_opening - 1 ] eq 'b' )
- ? $i_opening - 2
- : $i_opening - 1;
+ # Modify if -vmll option is selected
+ if ($rOpts_variable_maximum_line_length) {
+ my $level = shift;
+ if ( $level < 0 ) { $level = 0 }
+ $maximum_line_length += $level * $rOpts_indent_columns;
+ }
+ return $maximum_line_length;
+}
- if ( $types_to_go[$i_prev] eq ','
- && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
- {
- $self->set_forced_breakpoint($i_prev);
- }
+sub excess_line_length {
- # also break before something like ':(' or '?('
- # if appropriate.
- elsif (
- $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
- {
- my $token_prev = $tokens_to_go[$i_prev];
- if ( $want_break_before{$token_prev} ) {
- $self->set_forced_breakpoint($i_prev);
- }
- } ## end elsif ( $types_to_go[$i_prev...])
- } ## end if ( $i_opening > 2 )
- } ## end if ( $minimum_depth <=...)
+ # return number of characters by which a line of tokens ($ibeg..$iend)
+ # exceeds the allowable line length.
+ my ( $self, $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
- # break after comma following closing structure
- if ( $next_type eq ',' ) {
- $self->set_forced_breakpoint( $i + 1 );
- }
+ # Include left and right weld lengths unless requested not to
+ my $wl = $ignore_left_weld ? 0 : $self->weld_len_left_to_go($iend);
+ my $wr = $ignore_right_weld ? 0 : $self->weld_len_right_to_go($iend);
- # break before an '=' following closing structure
- if (
- $is_assignment{$next_nonblank_type}
- && ( $breakpoint_stack[$current_depth] !=
- get_forced_breakpoint_count() )
- )
- {
- $self->set_forced_breakpoint($i);
- } ## end if ( $is_assignment{$next_nonblank_type...})
+ return total_line_length( $ibeg, $iend ) + $wl + $wr -
+ maximum_line_length($ibeg);
+}
- # break at any comma before the opening structure Added
- # for -lp, but seems to be good in general. It isn't
- # obvious how far back to look; the '5' below seems to
- # work well and will catch the comma in something like
- # push @list, myfunc( $param, $param, ..
+sub get_spaces {
- my $icomma = $last_comma_index[$depth];
- if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
- unless ( $forced_breakpoint_to_go[$icomma] ) {
- $self->set_forced_breakpoint($icomma);
- }
- }
- } # end logic to open up a container
+ # return the number of leading spaces associated with an indentation
+ # variable $indentation is either a constant number of spaces or an object
+ # with a get_spaces method.
+ my $indentation = shift;
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
+}
- # Break open a logical container open if it was already open
- elsif ($is_simple_logical_expression
- && $has_old_logical_breakpoints[$current_depth] )
- {
- $self->set_logical_breakpoints($current_depth);
- }
+sub get_recoverable_spaces {
- # Handle long container which does not get opened up
- elsif ($is_long_term) {
+ # return the number of spaces (+ means shift right, - means shift left)
+ # that we would like to shift a group of lines with the same indentation
+ # to get them to line up with their opening parens
+ my $indentation = shift;
+ return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
+}
- # must set fake breakpoint to alert outer containers that
- # they are complex
- set_fake_breakpoint();
- } ## end elsif ($is_long_term)
+sub get_available_spaces_to_go {
- } ## end elsif ( $depth < $current_depth)
+ my ( $self, $ii ) = @_;
+ my $item = $leading_spaces_to_go[$ii];
- #------------------------------------------------------------
- # Handle this token
- #------------------------------------------------------------
+ # return the number of available leading spaces associated with an
+ # indentation variable. $indentation is either a constant number of
+ # spaces or an object with a get_available_spaces method.
+ return ref($item) ? $item->get_available_spaces() : 0;
+}
- $current_depth = $depth;
+{ ## begin closure set_leading_whitespace (for -lp indentation)
- # handle comma-arrow
- if ( $type eq '=>' ) {
- next if ( $last_nonblank_type eq '=>' );
- next if $rOpts_break_at_old_comma_breakpoints;
- next if $rOpts_comma_arrow_breakpoints == 3;
- $want_comma_break[$depth] = 1;
- $index_before_arrow[$depth] = $i_last_nonblank_token;
- next;
- } ## end if ( $type eq '=>' )
+ # These routines are called batch-by-batch to handle the -lp indentation
+ # option. The coding is rather complex, but is only for -lp.
- elsif ( $type eq '.' ) {
- $last_dot_index[$depth] = $i;
- }
+ my $gnu_position_predictor;
+ my $gnu_sequence_number;
+ my $line_start_index_to_go;
+ my $max_gnu_item_index;
+ my $max_gnu_stack_index;
+ my %gnu_arrow_count;
+ my %gnu_comma_count;
+ my %last_gnu_equals;
+ my @gnu_item_list;
+ my @gnu_stack;
- # Turn off alignment if we are sure that this is not a list
- # environment. To be safe, we will do this if we see certain
- # non-list tokens, such as ';', and also the environment is
- # not a list. Note that '=' could be in any of the = operators
- # (lextest.t). We can't just use the reported environment
- # because it can be incorrect in some cases.
- elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
- && $container_environment_to_go[$i] ne 'LIST' )
- {
- $dont_align[$depth] = 1;
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
+ sub initialize_gnu_vars {
- # now just handle any commas
- next unless ( $type eq ',' );
+ # initialize gnu variables for a new file;
+ # must be called once at the start of a new file.
- $last_dot_index[$depth] = undef;
- $last_comma_index[$depth] = $i;
+ # initialize the leading whitespace stack to negative levels
+ # so that we can never run off the end of the stack
+ $gnu_position_predictor =
+ 0; # where the current token is predicted to be
+ $max_gnu_stack_index = 0;
+ $max_gnu_item_index = -1;
+ $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
+ @gnu_item_list = ();
+ return;
+ }
- # break here if this comma follows a '=>'
- # but not if there is a side comment after the comma
- if ( $want_comma_break[$depth] ) {
+ sub initialize_gnu_batch_vars {
- if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- if ($rOpts_comma_arrow_breakpoints) {
- $want_comma_break[$depth] = 0;
- next;
- }
- }
+ # initialize gnu variables for a new batch;
+ # must be called before each new batch
+ $gnu_sequence_number++; # increment output batch counter
+ %last_gnu_equals = ();
+ %gnu_comma_count = ();
+ %gnu_arrow_count = ();
+ $line_start_index_to_go = 0;
+ $max_gnu_item_index = UNDEFINED_INDEX;
+ return;
+ }
- $self->set_forced_breakpoint($i)
- unless ( $next_nonblank_type eq '#' );
+ sub new_lp_indentation_item {
- # break before the previous token if it looks safe
- # Example of something that we will not try to break before:
- # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
- # Also we don't want to break at a binary operator (like +):
- # $c->createOval(
- # $x + $R, $y +
- # $R => $x - $R,
- # $y - $R, -fill => 'black',
- # );
- my $ibreak = $index_before_arrow[$depth] - 1;
- if ( $ibreak > 0
- && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
- {
- if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+ # this is an interface to the IndentationItem class
+ my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
- # don't break pointer calls, such as the following:
- # File::Spec->curdir => 1,
- # (This is tokenized as adjacent 'w' tokens)
- ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+ # A negative level implies not to store the item in the item_list
+ my $index = 0;
+ if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
- # And don't break before a comma, as in the following:
- # ( LONGER_THAN,=> 1,
- # EIGHTY_CHARACTERS,=> 2,
- # CAUSES_FORMATTING,=> 3,
- # LIKE_THIS,=> 4,
- # );
- # This example is for -tso but should be general rule
- if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
- && $tokens_to_go[ $ibreak + 1 ] ne ',' )
- {
- $self->set_forced_breakpoint($ibreak);
- }
- } ## end if ( $types_to_go[$ibreak...])
- } ## end if ( $ibreak > 0 && $tokens_to_go...)
+ my $starting_index_K = 0;
+ if ( defined($line_start_index_to_go)
+ && $line_start_index_to_go >= 0
+ && $line_start_index_to_go <= $max_index_to_go )
+ {
+ $starting_index_K = $K_to_go[$line_start_index_to_go];
+ }
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
+ my $item = Perl::Tidy::IndentationItem->new(
+ spaces => $spaces,
+ level => $level,
+ ci_level => $ci_level,
+ available_spaces => $available_spaces,
+ index => $index,
+ gnu_sequence_number => $gnu_sequence_number,
+ align_paren => $align_paren,
+ stack_depth => $max_gnu_stack_index,
+ starting_index_K => $starting_index_K,
+ );
- # handle list which mixes '=>'s and ','s:
- # treat any list items so far as an interrupted list
- $interrupted_list[$depth] = 1;
- next;
- } ## end if ( $want_comma_break...)
+ if ( $level >= 0 ) {
+ $gnu_item_list[$max_gnu_item_index] = $item;
+ }
- # break after all commas above starting depth
- if ( $depth < $starting_depth && !$dont_align[$depth] ) {
- $self->set_forced_breakpoint($i)
- unless ( $next_nonblank_type eq '#' );
- next;
- }
+ return $item;
+ }
- # add this comma to the list..
- my $item_count = $item_count_stack[$depth];
- if ( $item_count == 0 ) {
+ sub set_leading_whitespace {
- # but do not form a list with no opening structure
- # for example:
+ # This routine defines leading whitespace for the case of -lp formatting
+ # given: the level and continuation_level of a token,
+ # define: space count of leading string which would apply if it
+ # were the first token of a new line.
- # open INFILE_COPY, ">$input_file_copy"
- # or die ("very long message");
+ my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
+ $level_abs, $ci_level, $in_continued_quote )
+ = @_;
- if ( ( $opening_structure_index_stack[$depth] < 0 )
- && $container_environment_to_go[$i] eq 'BLOCK' )
- {
- $dont_align[$depth] = 1;
- }
- } ## end if ( $item_count == 0 )
+ return unless ($rOpts_line_up_parentheses);
+ return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
- $comma_index[$depth][$item_count] = $i;
- ++$item_count_stack[$depth];
- if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
- $identifier_count_stack[$depth]++;
- }
- } ## end while ( ++$i <= $max_index_to_go)
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rLL = $self->[_rLL_];
- #-------------------------------------------
- # end of loop over all tokens in this batch
- #-------------------------------------------
+ # find needed previous nonblank tokens
+ my $last_nonblank_token = '';
+ my $last_nonblank_type = '';
+ my $last_nonblank_block_type = '';
- # set breaks for any unfinished lists ..
- for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
+ # and previous nonblank tokens, just in this batch:
+ my $last_nonblank_token_in_batch = '';
+ my $last_nonblank_type_in_batch = '';
+ my $last_last_nonblank_type_in_batch = '';
- $interrupted_list[$dd] = 1;
- $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
- $self->set_comma_breakpoints($dd);
- $self->set_logical_breakpoints($dd)
- if ( $has_old_logical_breakpoints[$dd] );
- $self->set_for_semicolon_breakpoints($dd);
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
+ $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
+ $last_nonblank_block_type =
+ $rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
- # break open container...
- my $i_opening = $opening_structure_index_stack[$dd];
- $self->set_forced_breakpoint($i_opening)
- unless (
- is_unbreakable_container($dd)
+ if ( $K_last_nonblank >= $K_to_go[0] ) {
+ $last_nonblank_token_in_batch = $last_nonblank_token;
+ $last_nonblank_type_in_batch = $last_nonblank_type;
+ if ( defined($K_last_last_nonblank)
+ && $K_last_last_nonblank > $K_to_go[0] )
+ {
+ $last_last_nonblank_type_in_batch =
+ $rLL->[$K_last_last_nonblank]->[_TYPE_];
+ }
+ }
+ }
- # Avoid a break which would place an isolated ' or "
- # on a line
- || ( $type eq 'Q'
- && $i_opening >= $max_index_to_go - 2
- && $token =~ /^['"]$/ )
- );
- } ## end for ( my $dd = $current_depth...)
+ ################################################################
- # Return a flag indicating if the input file had some good breakpoints.
- # This flag will be used to force a break in a line shorter than the
- # allowed line length.
- if ( $has_old_logical_breakpoints[$current_depth] ) {
- $saw_good_breakpoint = 1;
+ # Adjust levels if necessary to recycle whitespace:
+ my $level = $level_abs;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $nK = @{$rLL};
+ my $nws = @{$radjusted_levels};
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $level = $radjusted_levels->[$Kj];
+ if ( $level < 0 ) { $level = 0 } # note: this should not happen
}
- # A complex line with one break at an = has a good breakpoint.
- # This is not complex ($total_depth_variation=0):
- # $res1
- # = 10;
- #
- # This is complex ($total_depth_variation=6):
- # $res2 =
- # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
- elsif ($i_old_assignment_break
- && $total_depth_variation > 4
- && $old_breakpoint_count == 1 )
- {
- $saw_good_breakpoint = 1;
- } ## end elsif ( $i_old_assignment_break...)
+ # The continued_quote flag means that this is the first token of a
+ # line, and it is the continuation of some kind of multi-line quote
+ # or pattern. It requires special treatment because it must have no
+ # added leading whitespace. So we create a special indentation item
+ # which is not in the stack.
+ if ($in_continued_quote) {
+ my $space_count = 0;
+ my $available_space = 0;
+ $level = -1; # flag to prevent storing in item_list
+ $leading_spaces_to_go[$max_index_to_go] =
+ $reduced_spaces_to_go[$max_index_to_go] =
+ new_lp_indentation_item( $space_count, $level, $ci_level,
+ $available_space, 0 );
+ return;
+ }
- return $saw_good_breakpoint;
- } ## end sub scan_list
-} ## end closure scan_list
+ # get the top state from the stack
+ my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
+ my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
-sub find_token_starting_list {
+ my $type = $types_to_go[$max_index_to_go];
+ my $token = $tokens_to_go[$max_index_to_go];
+ my $total_depth = $nesting_depth_to_go[$max_index_to_go];
- # When testing to see if a block will fit on one line, some
- # previous token(s) may also need to be on the line; particularly
- # if this is a sub call. So we will look back at least one
- # token. NOTE: This isn't perfect, but not critical, because
- # if we mis-identify a block, it will be wrapped and therefore
- # fixed the next time it is formatted.
- my ( $self, $i_opening_paren ) = @_;
- my $i_opening_minus = $i_opening_paren;
- my $im1 = $i_opening_paren - 1;
- my $im2 = $i_opening_paren - 2;
- my $im3 = $i_opening_paren - 3;
- my $typem1 = $types_to_go[$im1];
- my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
+ if ( $type eq '{' || $type eq '(' ) {
- if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
- $i_opening_minus = $i_opening_paren;
- }
- elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
- $i_opening_minus = $im1 if $im1 >= 0;
+ $gnu_comma_count{ $total_depth + 1 } = 0;
+ $gnu_arrow_count{ $total_depth + 1 } = 0;
- # walk back to improve length estimate
- for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
- last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
- $i_opening_minus = $j;
- }
- if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
- }
- elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
- elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
- $i_opening_minus = $im2;
- }
- return $i_opening_minus;
-}
+ # If we come to an opening token after an '=' token of some type,
+ # see if it would be helpful to 'break' after the '=' to save space
+ my $last_equals = $last_gnu_equals{$total_depth};
+ if ( $last_equals && $last_equals > $line_start_index_to_go ) {
-{ ## begin closure set_comma_breakpoints_do
+ # find the position if we break at the '='
+ my $i_test = $last_equals;
+ if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
- my %is_keyword_with_special_leading_term;
+ # TESTING
+ ##my $too_close = ($i_test==$max_index_to_go-1);
- BEGIN {
+ my $test_position =
+ total_line_length( $i_test, $max_index_to_go );
+ my $mll = maximum_line_length($i_test);
- # These keywords have prototypes which allow a special leading item
- # followed by a list
- my @q =
- qw(formline grep kill map printf sprintf push chmod join pack unshift);
- @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
- }
+ if (
- my $DEBUG_SPARSE;
+ # the equals is not just before an open paren (testing)
+ ##!$too_close &&
- sub set_comma_breakpoints_do {
+ # if we are beyond the midpoint
+ $gnu_position_predictor >
+ $mll - $rOpts_maximum_line_length / 2
- # Given a list with some commas, set breakpoints at some of the
- # commas, if necessary, to make it easy to read.
+ # or we are beyond the 1/4 point and there was an old
+ # break at the equals
+ || (
+ $gnu_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
+ && (
+ $old_breakpoint_to_go[$last_equals]
+ || ( $last_equals > 0
+ && $old_breakpoint_to_go[ $last_equals - 1 ] )
+ || ( $last_equals > 1
+ && $types_to_go[ $last_equals - 1 ] eq 'b'
+ && $old_breakpoint_to_go[ $last_equals - 2 ] )
+ )
+ )
+ )
+ {
- my ( $self, %input_hash ) = @_;
+ # then make the switch -- note that we do not set a real
+ # breakpoint here because we may not really need one; sub
+ # scan_list will do that if necessary
+ $line_start_index_to_go = $i_test + 1;
+ $gnu_position_predictor = $test_position;
+ }
+ }
+ }
- my $depth = $input_hash{depth};
- my $i_opening_paren = $input_hash{i_opening_paren};
- my $i_closing_paren = $input_hash{i_closing_paren};
- my $item_count = $input_hash{item_count};
- my $identifier_count = $input_hash{identifier_count};
- my $rcomma_index = $input_hash{rcomma_index};
- my $next_nonblank_type = $input_hash{next_nonblank_type};
- my $list_type = $input_hash{list_type};
- my $interrupted = $input_hash{interrupted};
- my $rdo_not_break_apart = $input_hash{rdo_not_break_apart};
- my $must_break_open = $input_hash{must_break_open};
- my $has_broken_sublist = $input_hash{has_broken_sublist};
+ my $halfway =
+ maximum_line_length_for_level($level) -
+ $rOpts_maximum_line_length / 2;
- # nothing to do if no commas seen
- return if ( $item_count < 1 );
+ # Check for decreasing depth ..
+ # Note that one token may have both decreasing and then increasing
+ # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
+ # in this example we would first go back to (1,0) then up to (2,0)
+ # in a single call.
+ if ( $level < $current_level || $ci_level < $current_ci_level ) {
- my $rOpts_break_at_old_comma_breakpoints =
- $rOpts->{'break-at-old-comma-breakpoints'};
- my $rOpts_maximum_fields_per_table =
- $rOpts->{'maximum-fields-per-table'};
+ # loop to find the first entry at or completely below this level
+ my ( $lev, $ci_lev );
+ while (1) {
+ if ($max_gnu_stack_index) {
- my $i_first_comma = $rcomma_index->[0];
- my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
- my $i_last_comma = $i_true_last_comma;
- if ( $i_last_comma >= $max_index_to_go ) {
- $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
- return if ( $item_count < 1 );
- }
+ # save index of token which closes this level
+ $gnu_stack[$max_gnu_stack_index]
+ ->set_closed($max_index_to_go);
- #---------------------------------------------------------------
- # find lengths of all items in the list to calculate page layout
- #---------------------------------------------------------------
- my $comma_count = $item_count;
- my @item_lengths;
- my @i_term_begin;
- my @i_term_end;
- my @i_term_comma;
- my $i_prev_plus;
- my @max_length = ( 0, 0 );
- my $first_term_length;
- my $i = $i_opening_paren;
- my $is_odd = 1;
+ # Undo any extra indentation if we saw no commas
+ my $available_spaces =
+ $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
- foreach my $j ( 0 .. $comma_count - 1 ) {
- $is_odd = 1 - $is_odd;
- $i_prev_plus = $i + 1;
- $i = $rcomma_index->[$j];
+ my $comma_count = 0;
+ my $arrow_count = 0;
+ if ( $type eq '}' || $type eq ')' ) {
+ $comma_count = $gnu_comma_count{$total_depth};
+ $arrow_count = $gnu_arrow_count{$total_depth};
+ $comma_count = 0 unless $comma_count;
+ $arrow_count = 0 unless $arrow_count;
+ }
+ $gnu_stack[$max_gnu_stack_index]
+ ->set_comma_count($comma_count);
+ $gnu_stack[$max_gnu_stack_index]
+ ->set_arrow_count($arrow_count);
- my $i_term_end =
- ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
- my $i_term_begin =
- ( $types_to_go[$i_prev_plus] eq 'b' )
- ? $i_prev_plus + 1
- : $i_prev_plus;
- push @i_term_begin, $i_term_begin;
- push @i_term_end, $i_term_end;
- push @i_term_comma, $i;
+ if ( $available_spaces > 0 ) {
- # note: currently adding 2 to all lengths (for comma and space)
- my $length =
- 2 + token_sequence_length( $i_term_begin, $i_term_end );
- push @item_lengths, $length;
+ if ( $comma_count <= 0 || $arrow_count > 0 ) {
- if ( $j == 0 ) {
- $first_term_length = $length;
- }
- else {
+ my $i =
+ $gnu_stack[$max_gnu_stack_index]->get_index();
+ my $seqno =
+ $gnu_stack[$max_gnu_stack_index]
+ ->get_sequence_number();
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
+ # Be sure this item was created in this batch. This
+ # should be true because we delete any available
+ # space from open items at the end of each batch.
+ if ( $gnu_sequence_number != $seqno
+ || $i > $max_gnu_item_index )
+ {
+ warning(
+"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
+ );
+ report_definite_bug();
+ }
+
+ else {
+ if ( $arrow_count == 0 ) {
+ $gnu_item_list[$i]
+ ->permanently_decrease_available_spaces(
+ $available_spaces);
+ }
+ else {
+ $gnu_item_list[$i]
+ ->tentatively_decrease_available_spaces(
+ $available_spaces);
+ }
+ foreach my $j ( $i + 1 .. $max_gnu_item_index )
+ {
+ $gnu_item_list[$j]
+ ->decrease_SPACES($available_spaces);
+ }
+ }
+ }
+ }
+
+ # go down one level
+ --$max_gnu_stack_index;
+ $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
+ $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+
+ # stop when we reach a level at or below the current level
+ if ( $lev <= $level && $ci_lev <= $ci_level ) {
+ $space_count =
+ $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ $current_level = $lev;
+ $current_ci_level = $ci_lev;
+ last;
+ }
+ }
+
+ # reached bottom of stack .. should never happen because
+ # only negative levels can get here, and $level was forced
+ # to be positive above.
+ else {
+ warning(
+"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
+ );
+ report_definite_bug();
+ last;
}
}
}
- # now we have to make a distinction between the comma count and item
- # count, because the item count will be one greater than the comma
- # count if the last item is not terminated with a comma
- my $i_b =
- ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
- ? $i_last_comma + 1
- : $i_last_comma;
- my $i_e =
- ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
- ? $i_closing_paren - 2
- : $i_closing_paren - 1;
- my $i_effective_last_comma = $i_last_comma;
-
- my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
-
- if ( $last_item_length > 0 ) {
+ # handle increasing depth
+ if ( $level > $current_level || $ci_level > $current_ci_level ) {
- # add 2 to length because other lengths include a comma and a blank
- $last_item_length += 2;
- push @item_lengths, $last_item_length;
- push @i_term_begin, $i_b + 1;
- push @i_term_end, $i_e;
- push @i_term_comma, undef;
+ # Compute the standard incremental whitespace. This will be
+ # the minimum incremental whitespace that will be used. This
+ # choice results in a smooth transition between the gnu-style
+ # and the standard style.
+ my $standard_increment =
+ ( $level - $current_level ) *
+ $rOpts_indent_columns +
+ ( $ci_level - $current_ci_level ) *
+ $rOpts_continuation_indentation;
- my $i_odd = $item_count % 2;
+ # Now we have to define how much extra incremental space
+ # ("$available_space") we want. This extra space will be
+ # reduced as necessary when long lines are encountered or when
+ # it becomes clear that we do not have a good list.
+ my $available_space = 0;
+ my $align_paren = 0;
+ my $excess = 0;
- if ( $last_item_length > $max_length[$i_odd] ) {
- $max_length[$i_odd] = $last_item_length;
+ # initialization on empty stack..
+ if ( $max_gnu_stack_index == 0 ) {
+ $space_count = $level * $rOpts_indent_columns;
}
- $item_count++;
- $i_effective_last_comma = $i_e + 1;
-
- if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
- $identifier_count++;
+ # if this is a BLOCK, add the standard increment
+ elsif ($last_nonblank_block_type) {
+ $space_count += $standard_increment;
}
- }
- #---------------------------------------------------------------
- # End of length calculations
- #---------------------------------------------------------------
+ # if last nonblank token was not structural indentation,
+ # just use standard increment
+ elsif ( $last_nonblank_type ne '{' ) {
+ $space_count += $standard_increment;
+ }
- #---------------------------------------------------------------
- # Compound List Rule 1:
- # Break at (almost) every comma for a list containing a broken
- # sublist. This has higher priority than the Interrupted List
- # Rule.
- #---------------------------------------------------------------
- if ($has_broken_sublist) {
+ # otherwise use the space to the first non-blank level change token
+ else {
- # Break at every comma except for a comma between two
- # simple, small terms. This prevents long vertical
- # columns of, say, just 0's.
- my $small_length = 10; # 2 + actual maximum length wanted
+ $space_count = $gnu_position_predictor;
- # We'll insert a break in long runs of small terms to
- # allow alignment in uniform tables.
- my $skipped_count = 0;
- my $columns = table_columns_available($i_first_comma);
- my $fields = int( $columns / $small_length );
- if ( $rOpts_maximum_fields_per_table
- && $fields > $rOpts_maximum_fields_per_table )
- {
- $fields = $rOpts_maximum_fields_per_table;
- }
- my $max_skipped_count = $fields - 1;
+ my $min_gnu_indentation =
+ $gnu_stack[$max_gnu_stack_index]->get_spaces();
- my $is_simple_last_term = 0;
- my $is_simple_next_term = 0;
- foreach my $j ( 0 .. $item_count ) {
- $is_simple_last_term = $is_simple_next_term;
- $is_simple_next_term = 0;
- if ( $j < $item_count
- && $i_term_end[$j] == $i_term_begin[$j]
- && $item_lengths[$j] <= $small_length )
- {
- $is_simple_next_term = 1;
+ $available_space = $space_count - $min_gnu_indentation;
+ if ( $available_space >= $standard_increment ) {
+ $min_gnu_indentation += $standard_increment;
}
- next if $j == 0;
- if ( $is_simple_last_term
- && $is_simple_next_term
- && $skipped_count < $max_skipped_count )
- {
- $skipped_count++;
+ elsif ( $available_space > 1 ) {
+ $min_gnu_indentation += $available_space + 1;
+ }
+ elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+ if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
+ $min_gnu_indentation += 2;
+ }
+ else {
+ $min_gnu_indentation += 1;
+ }
}
else {
- $skipped_count = 0;
- my $i = $i_term_comma[ $j - 1 ];
- last unless defined $i;
- $self->set_forced_breakpoint($i);
+ $min_gnu_indentation += $standard_increment;
+ }
+ $available_space = $space_count - $min_gnu_indentation;
+
+ if ( $available_space < 0 ) {
+ $space_count = $min_gnu_indentation;
+ $available_space = 0;
}
+ $align_paren = 1;
}
- # always break at the last comma if this list is
- # interrupted; we wouldn't want to leave a terminal '{', for
- # example.
- if ($interrupted) {
- $self->set_forced_breakpoint($i_true_last_comma);
+ # update state, but not on a blank token
+ if ( $types_to_go[$max_index_to_go] ne 'b' ) {
+
+ $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
+
+ ++$max_gnu_stack_index;
+ $gnu_stack[$max_gnu_stack_index] =
+ new_lp_indentation_item( $space_count, $level, $ci_level,
+ $available_space, $align_paren );
+
+ # If the opening paren is beyond the half-line length, then
+ # we will use the minimum (standard) indentation. This will
+ # help avoid problems associated with running out of space
+ # near the end of a line. As a result, in deeply nested
+ # lists, there will be some indentations which are limited
+ # to this minimum standard indentation. But the most deeply
+ # nested container will still probably be able to shift its
+ # parameters to the right for proper alignment, so in most
+ # cases this will not be noticeable.
+ if ( $available_space > 0 && $space_count > $halfway ) {
+ $gnu_stack[$max_gnu_stack_index]
+ ->tentatively_decrease_available_spaces($available_space);
+ }
}
- return;
}
-#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
-#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
-#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
+ # Count commas and look for non-list characters. Once we see a
+ # non-list character, we give up and don't look for any more commas.
+ if ( $type eq '=>' ) {
+ $gnu_arrow_count{$total_depth}++;
- #---------------------------------------------------------------
- # Interrupted List Rule:
- # A list is forced to use old breakpoints if it was interrupted
- # by side comments or blank lines, or requested by user.
- #---------------------------------------------------------------
- if ( $rOpts_break_at_old_comma_breakpoints
- || $interrupted
- || $i_opening_paren < 0 )
- {
- $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
- return;
+ # tentatively treating '=>' like '=' for estimating breaks
+ # TODO: this could use some experimentation
+ $last_gnu_equals{$total_depth} = $max_index_to_go;
}
- #---------------------------------------------------------------
- # Looks like a list of items. We have to look at it and size it up.
- #---------------------------------------------------------------
+ elsif ( $type eq ',' ) {
+ $gnu_comma_count{$total_depth}++;
+ }
- my $opening_token = $tokens_to_go[$i_opening_paren];
- my $opening_environment =
- $container_environment_to_go[$i_opening_paren];
+ elsif ( $is_assignment{$type} ) {
+ $last_gnu_equals{$total_depth} = $max_index_to_go;
+ }
- #-------------------------------------------------------------------
- # Return if this will fit on one line
- #-------------------------------------------------------------------
+ # this token might start a new line
+ # if this is a non-blank..
+ if ( $type ne 'b' ) {
- my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
- return
- unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
- > 0;
+ # and if ..
+ if (
- #-------------------------------------------------------------------
- # Now we know that this block spans multiple lines; we have to set
- # at least one breakpoint -- real or fake -- as a signal to break
- # open any outer containers.
- #-------------------------------------------------------------------
- set_fake_breakpoint();
+ # this is the first nonblank token of the line
+ $max_index_to_go == 1 && $types_to_go[0] eq 'b'
- # be sure we do not extend beyond the current list length
- if ( $i_effective_last_comma >= $max_index_to_go ) {
- $i_effective_last_comma = $max_index_to_go - 1;
- }
+ # or previous character was one of these:
+ || $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
- # Set a flag indicating if we need to break open to keep -lp
- # items aligned. This is necessary if any of the list terms
- # exceeds the available space after the '('.
- my $need_lp_break_open = $must_break_open;
- if ( $rOpts_line_up_parentheses && !$must_break_open ) {
- my $columns_if_unbroken =
- maximum_line_length($i_opening_minus) -
- total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open =
- ( $max_length[0] > $columns_if_unbroken )
- || ( $max_length[1] > $columns_if_unbroken )
- || ( $first_term_length > $columns_if_unbroken );
- }
+ # or previous character was opening and this does not close it
+ || ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
+ || ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
- # Specify if the list must have an even number of fields or not.
- # It is generally safest to assume an even number, because the
- # list items might be a hash list. But if we can be sure that
- # it is not a hash, then we can allow an odd number for more
- # flexibility.
- my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
+ # or this token is one of these:
+ || $type =~ /^([\.]|\|\||\&\&)$/
+
+ # or this is a closing structure
+ || ( $last_nonblank_type_in_batch eq '}'
+ && $last_nonblank_token_in_batch eq
+ $last_nonblank_type_in_batch )
+
+ # or previous token was keyword 'return'
+ || (
+ $last_nonblank_type_in_batch eq 'k'
+ && ( $last_nonblank_token_in_batch eq 'return'
+ && $type ne '{' )
+ )
- if ( $identifier_count >= $item_count - 1
- || $is_assignment{$next_nonblank_type}
- || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
- )
- {
- $odd_or_even = 1;
- }
+ # or starting a new line at certain keywords is fine
+ || ( $type eq 'k'
+ && $is_if_unless_and_or_last_next_redo_return{$token} )
- # do we have a long first term which should be
- # left on a line by itself?
- my $use_separate_first_term = (
- $odd_or_even == 1 # only if we can use 1 field/line
- && $item_count > 3 # need several items
- && $first_term_length >
- 2 * $max_length[0] - 2 # need long first term
- && $first_term_length >
- 2 * $max_length[1] - 2 # need long first term
- );
+ # or this is after an assignment after a closing structure
+ || (
+ $is_assignment{$last_nonblank_type_in_batch}
+ && (
+ $last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
- # or do we know from the type of list that the first term should
- # be placed alone?
- if ( !$use_separate_first_term ) {
- if ( $is_keyword_with_special_leading_term{$list_type} ) {
- $use_separate_first_term = 1;
+ # and it is significantly to the right
+ || $gnu_position_predictor > $halfway
+ )
+ )
+ )
+ {
+ check_for_long_gnu_style_lines($max_index_to_go);
+ $line_start_index_to_go = $max_index_to_go;
- # should the container be broken open?
- if ( $item_count < 3 ) {
- if ( $i_first_comma - $i_opening_paren < 4 ) {
- ${$rdo_not_break_apart} = 1;
+ # back up 1 token if we want to break before that type
+ # otherwise, we may strand tokens like '?' or ':' on a line
+ if ( $line_start_index_to_go > 0 ) {
+ if ( $last_nonblank_type_in_batch eq 'k' ) {
+
+ if ( $want_break_before{$last_nonblank_token_in_batch} )
+ {
+ $line_start_index_to_go--;
+ }
}
- }
- elsif ($first_term_length < 20
- && $i_first_comma - $i_opening_paren < 4 )
- {
- my $columns = table_columns_available($i_first_comma);
- if ( $first_term_length < $columns ) {
- ${$rdo_not_break_apart} = 1;
+ elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
+ $line_start_index_to_go--;
}
}
}
}
- # if so,
- if ($use_separate_first_term) {
-
- # ..set a break and update starting values
- $use_separate_first_term = 1;
- $self->set_forced_breakpoint($i_first_comma);
- $i_opening_paren = $i_first_comma;
- $i_first_comma = $rcomma_index->[1];
- $item_count--;
- return if $comma_count == 1;
- shift @item_lengths;
- shift @i_term_begin;
- shift @i_term_end;
- shift @i_term_comma;
+ # remember the predicted position of this token on the output line
+ if ( $max_index_to_go > $line_start_index_to_go ) {
+ $gnu_position_predictor =
+ total_line_length( $line_start_index_to_go, $max_index_to_go );
}
-
- # if not, update the metrics to include the first term
else {
- if ( $first_term_length > $max_length[0] ) {
- $max_length[0] = $first_term_length;
- }
+ $gnu_position_predictor =
+ $space_count + $token_lengths_to_go[$max_index_to_go];
}
- # Field width parameters
- my $pair_width = ( $max_length[0] + $max_length[1] );
- my $max_width =
- ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
+ # store the indentation object for this token
+ # this allows us to manipulate the leading whitespace
+ # (in case we have to reduce indentation to fit a line) without
+ # having to change any token values
+ $leading_spaces_to_go[$max_index_to_go] =
+ $gnu_stack[$max_gnu_stack_index];
+ $reduced_spaces_to_go[$max_index_to_go] =
+ ( $max_gnu_stack_index > 0 && $ci_level )
+ ? $gnu_stack[ $max_gnu_stack_index - 1 ]
+ : $gnu_stack[$max_gnu_stack_index];
+ return;
+ }
- # Number of free columns across the page width for laying out tables
- my $columns = table_columns_available($i_first_comma);
+ sub check_for_long_gnu_style_lines {
- # Estimated maximum number of fields which fit this space
- # This will be our first guess
- my $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even, $max_width,
- $pair_width );
- my $number_of_fields = $number_of_fields_max;
+ # look at the current estimated maximum line length, and
+ # remove some whitespace if it exceeds the desired maximum
+ my ($mx_index_to_go) = @_;
- # Find the best-looking number of fields
- # and make this our second guess if possible
- my ( $number_of_fields_best, $ri_ragged_break_list,
- $new_identifier_count )
- = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
- \@item_lengths, $max_width );
+ # this is only for the '-lp' style
+ return unless ($rOpts_line_up_parentheses);
- if ( $number_of_fields_best != 0
- && $number_of_fields_best < $number_of_fields_max )
- {
- $number_of_fields = $number_of_fields_best;
- }
+ # nothing can be done if no stack items defined for this line
+ return if ( $max_gnu_item_index == UNDEFINED_INDEX );
- # ----------------------------------------------------------------------
- # If we are crowded and the -lp option is being used, try to
- # undo some indentation
- # ----------------------------------------------------------------------
- if (
- $rOpts_line_up_parentheses
- && (
- $number_of_fields == 0
- || ( $number_of_fields == 1
- && $number_of_fields != $number_of_fields_best )
- )
- )
- {
- my $available_spaces =
- $self->get_available_spaces_to_go($i_first_comma);
- if ( $available_spaces > 0 ) {
+ # see if we have exceeded the maximum desired line length
+ # keep 2 extra free because they are needed in some cases
+ # (result of trial-and-error testing)
+ my $spaces_needed =
+ $gnu_position_predictor - maximum_line_length($mx_index_to_go) + 2;
- my $spaces_wanted = $max_width - $columns; # for 1 field
+ return if ( $spaces_needed <= 0 );
- if ( $number_of_fields_best == 0 ) {
- $number_of_fields_best =
- get_maximum_fields_wanted( \@item_lengths );
- }
+ # We are over the limit, so try to remove a requested number of
+ # spaces from leading whitespace. We are only allowed to remove
+ # from whitespace items created on this batch, since others have
+ # already been used and cannot be undone.
+ my @candidates = ();
+ my $i;
- if ( $number_of_fields_best != 1 ) {
- my $spaces_wanted_2 =
- 1 + $pair_width - $columns; # for 2 fields
- if ( $available_spaces > $spaces_wanted_2 ) {
- $spaces_wanted = $spaces_wanted_2;
- }
- }
+ # loop over all whitespace items created for the current batch
+ for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
+ my $item = $gnu_item_list[$i];
- if ( $spaces_wanted > 0 ) {
- my $deleted_spaces =
- $self->reduce_lp_indentation( $i_first_comma,
- $spaces_wanted );
+ # item must still be open to be a candidate (otherwise it
+ # cannot influence the current token)
+ next if ( $item->get_closed() >= 0 );
- # redo the math
- if ( $deleted_spaces > 0 ) {
- $columns = table_columns_available($i_first_comma);
- $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even,
- $max_width, $pair_width );
- $number_of_fields = $number_of_fields_max;
+ my $available_spaces = $item->get_available_spaces();
- if ( $number_of_fields_best == 1
- && $number_of_fields >= 1 )
- {
- $number_of_fields = $number_of_fields_best;
- }
- }
- }
+ if ( $available_spaces > 0 ) {
+ push( @candidates, [ $i, $available_spaces ] );
}
}
- # try for one column if two won't work
- if ( $number_of_fields <= 0 ) {
- $number_of_fields = int( $columns / $max_width );
- }
+ return unless (@candidates);
- # The user can place an upper bound on the number of fields,
- # which can be useful for doing maintenance on tables
- if ( $rOpts_maximum_fields_per_table
- && $number_of_fields > $rOpts_maximum_fields_per_table )
- {
- $number_of_fields = $rOpts_maximum_fields_per_table;
+ # sort by available whitespace so that we can remove whitespace
+ # from the maximum available first
+ @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
+
+ # keep removing whitespace until we are done or have no more
+ foreach my $candidate (@candidates) {
+ my ( $i, $available_spaces ) = @{$candidate};
+ my $deleted_spaces =
+ ( $available_spaces > $spaces_needed )
+ ? $spaces_needed
+ : $available_spaces;
+
+ # remove the incremental space from this item
+ $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
+
+ my $i_debug = $i;
+
+ # update the leading whitespace of this item and all items
+ # that came after it
+ for ( ; $i <= $max_gnu_item_index ; $i++ ) {
+
+ my $old_spaces = $gnu_item_list[$i]->get_spaces();
+ if ( $old_spaces >= $deleted_spaces ) {
+ $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
+ }
+
+ # shouldn't happen except for code bug:
+ else {
+ my $level = $gnu_item_list[$i_debug]->get_level();
+ my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
+ my $old_level = $gnu_item_list[$i]->get_level();
+ my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
+ warning(
+"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
+ );
+ report_definite_bug();
+ }
+ }
+ $gnu_position_predictor -= $deleted_spaces;
+ $spaces_needed -= $deleted_spaces;
+ last unless ( $spaces_needed > 0 );
}
+ return;
+ }
- # How many columns (characters) and lines would this container take
- # if no additional whitespace were added?
- my $packed_columns = token_sequence_length( $i_opening_paren + 1,
- $i_effective_last_comma + 1 );
- if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
- my $packed_lines = 1 + int( $packed_columns / $columns );
+ sub finish_lp_batch {
- # are we an item contained in an outer list?
- my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
+ # This routine is called once after each output stream batch is
+ # finished to undo indentation for all incomplete -lp
+ # indentation levels. It is too risky to leave a level open,
+ # because then we can't backtrack in case of a long line to follow.
+ # This means that comments and blank lines will disrupt this
+ # indentation style. But the vertical aligner may be able to
+ # get the space back if there are side comments.
- if ( $number_of_fields <= 0 ) {
+ # this is only for the 'lp' style
+ return unless ($rOpts_line_up_parentheses);
-# #---------------------------------------------------------------
-# # We're in trouble. We can't find a single field width that works.
-# # There is no simple answer here; we may have a single long list
-# # item, or many.
-# #---------------------------------------------------------------
-#
-# In many cases, it may be best to not force a break if there is just one
-# comma, because the standard continuation break logic will do a better
-# job without it.
-#
-# In the common case that all but one of the terms can fit
-# on a single line, it may look better not to break open the
-# containing parens. Consider, for example
-#
-# $color =
-# join ( '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; }
-# keys %colors );
-#
-# which will look like this with the container broken:
-#
-# $color = join (
-# '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
-# );
-#
-# Here is an example of this rule for a long last term:
-#
-# log_message( 0, 256, 128,
-# "Number of routes in adj-RIB-in to be considered: $peercount" );
-#
-# And here is an example with a long first term:
-#
-# $s = sprintf(
-# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
-# $r, $pu, $ps, $cu, $cs, $tt
-# )
-# if $style eq 'all';
+ # nothing can be done if no stack items defined for this line
+ return if ( $max_gnu_item_index == UNDEFINED_INDEX );
- my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
- my $long_last_term =
- $self->excess_line_length( 0, $i_last_comma ) <= 0;
- my $long_first_term =
- $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
- <= 0;
+ # loop over all whitespace items created for the current batch
+ foreach my $i ( 0 .. $max_gnu_item_index ) {
+ my $item = $gnu_item_list[$i];
- # break at every comma ...
- if (
+ # only look for open items
+ next if ( $item->get_closed() >= 0 );
- # if requested by user or is best looking
- $number_of_fields_best == 1
+ # Tentatively remove all of the available space
+ # (The vertical aligner will try to get it back later)
+ my $available_spaces = $item->get_available_spaces();
+ if ( $available_spaces > 0 ) {
- # or if this is a sublist of a larger list
- || $in_hierarchical_list
+ # delete incremental space for this item
+ $gnu_item_list[$i]
+ ->tentatively_decrease_available_spaces($available_spaces);
- # or if multiple commas and we don't have a long first or last
- # term
- || ( $comma_count > 1
- && !( $long_last_term || $long_first_term ) )
- )
- {
- foreach ( 0 .. $comma_count - 1 ) {
- $self->set_forced_breakpoint( $rcomma_index->[$_] );
+ # Reduce the total indentation space of any nodes that follow
+ # Note that any such nodes must necessarily be dependents
+ # of this node.
+ foreach ( $i + 1 .. $max_gnu_item_index ) {
+ $gnu_item_list[$_]->decrease_SPACES($available_spaces);
}
}
- elsif ($long_last_term) {
+ }
+ return;
+ }
+} ## end closure set_leading_whitespace
- $self->set_forced_breakpoint($i_last_comma);
- ${$rdo_not_break_apart} = 1 unless $must_break_open;
- }
- elsif ($long_first_term) {
+sub reduce_lp_indentation {
- $self->set_forced_breakpoint($i_first_comma);
- }
- else {
+ # reduce the leading whitespace at token $i if possible by $spaces_needed
+ # (a large value of $spaces_needed will remove all excess space)
+ # NOTE: to be called from scan_list only for a sequence of tokens
+ # contained between opening and closing parens/braces/brackets
- # let breaks be defined by default bond strength logic
- }
- return;
- }
+ my ( $self, $i, $spaces_wanted ) = @_;
+ my $deleted_spaces = 0;
- # --------------------------------------------------------
- # We have a tentative field count that seems to work.
- # How many lines will this require?
- # --------------------------------------------------------
- my $formatted_lines = $item_count / ($number_of_fields);
- if ( $formatted_lines != int $formatted_lines ) {
- $formatted_lines = 1 + int $formatted_lines;
- }
+ my $item = $leading_spaces_to_go[$i];
+ my $available_spaces = $item->get_available_spaces();
- # So far we've been trying to fill out to the right margin. But
- # compact tables are easier to read, so let's see if we can use fewer
- # fields without increasing the number of lines.
- $number_of_fields =
- compactify_table( $item_count, $number_of_fields, $formatted_lines,
- $odd_or_even );
+ if (
+ $available_spaces > 0
+ && ( ( $spaces_wanted <= $available_spaces )
+ || !$item->get_have_child() )
+ )
+ {
- # How many spaces across the page will we fill?
- my $columns_per_line =
- ( int $number_of_fields / 2 ) * $pair_width +
- ( $number_of_fields % 2 ) * $max_width;
+ # we'll remove these spaces, but mark them as recoverable
+ $deleted_spaces =
+ $item->tentatively_decrease_available_spaces($spaces_wanted);
+ }
- my $formatted_columns;
+ return $deleted_spaces;
+}
- if ( $number_of_fields > 1 ) {
- $formatted_columns =
- ( $pair_width * ( int( $item_count / 2 ) ) +
- ( $item_count % 2 ) * $max_width );
- }
- else {
- $formatted_columns = $max_width * $item_count;
- }
- if ( $formatted_columns < $packed_columns ) {
- $formatted_columns = $packed_columns;
- }
+###########################################################
+# CODE SECTION 13: Preparing batches for vertical alignment
+###########################################################
- my $unused_columns = $formatted_columns - $packed_columns;
+sub send_lines_to_vertical_aligner {
- # set some empirical parameters to help decide if we should try to
- # align; high sparsity does not look good, especially with few lines
- my $sparsity = ($unused_columns) / ($formatted_columns);
- my $max_allowed_sparsity =
- ( $item_count < 3 ) ? 0.1
- : ( $packed_lines == 1 ) ? 0.15
- : ( $packed_lines == 2 ) ? 0.4
- : 0.7;
+ my ($self) = @_;
- # Begin check for shortcut methods, which avoid treating a list
- # as a table for relatively small parenthesized lists. These
- # are usually easier to read if not formatted as tables.
- if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
- && $opening_environment eq 'BLOCK' # not a sub-container
- && $opening_token eq '(' # is paren list
- )
- {
+ # This routine receives a batch of code for which the final line breaks
+ # have been defined. Here we prepare the lines for passing to the vertical
+ # aligner. We do the following tasks:
+ # - mark certain vertical alignment tokens, such as '=', in each line
+ # - make minor indentation adjustments
+ # - do logical padding: insert extra blank spaces to help display certain
+ # logical constructions
- # Shortcut method 1: for -lp and just one comma:
- # This is a no-brainer, just break at the comma.
- if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
- && !$must_break_open
- )
- {
- my $i_break = $rcomma_index->[0];
- $self->set_forced_breakpoint($i_break);
- ${$rdo_not_break_apart} = 1;
- return;
+ my $this_batch = $self->[_this_batch_];
+ my $rlines_K = $this_batch->[_rlines_K_];
+ if ( !@{$rlines_K} ) {
+ Fault("Unexpected call with no lines");
+ return;
+ }
+ my $n_last_line = @{$rlines_K} - 1;
- }
+ my $do_not_pad = $this_batch->[_do_not_pad_];
+ my $peak_batch_size = $this_batch->[_peak_batch_size_];
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+ my $ibeg0 = $this_batch->[_ibeg0_];
+ my $rK_to_go = $this_batch->[_rK_to_go_];
+ my $batch_count = $this_batch->[_batch_count_];
- # method 2 is for most small ragged lists which might look
- # best if not displayed as a table.
- if (
- ( $number_of_fields == 2 && $item_count == 3 )
- || (
- $new_identifier_count > 0 # isn't all quotes
- && $sparsity > 0.15
- ) # would be fairly spaced gaps if aligned
- )
- {
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
- my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
+ my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
+ my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
+ my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+
+ # Construct indexes to the global_to_go arrays so that called routines can
+ # still access those arrays. This might eventually be removed
+ # when all called routines have been converted to access token values
+ # in the rLL array instead.
+ my $Kbeg0 = $Kbeg_next;
+ my ( $ri_first, $ri_last );
+ foreach my $rline ( @{$rlines_K} ) {
+ my ( $Kbeg, $Kend ) = @{$rline};
+ my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
+ my $iend = $ibeg0 + $Kend - $Kbeg0;
+ push @{$ri_first}, $ibeg;
+ push @{$ri_last}, $iend;
+ }
+
+ my ( $cscw_block_comment, $closing_side_comment );
+ if ( $rOpts->{'closing-side-comments'} ) {
+ ( $closing_side_comment, $cscw_block_comment ) =
+ $self->add_closing_side_comment();
+ }
+
+ my $rindentation_list = [0]; # ref to indentations for each line
+
+ # define the array @{$ralignment_type_to_go} for the output tokens
+ # which will be non-blank for each special token (such as =>)
+ # for which alignment is required.
+ my $ralignment_type_to_go =
+ $self->set_vertical_alignment_markers( $ri_first, $ri_last );
- # NOTE: we should really use the true break count here,
- # which can be greater if there are large terms and
- # little space, but usually this will work well enough.
- unless ($must_break_open) {
+ # flush before a long if statement to avoid unwanted alignment
+ if ( $n_last_line > 0
+ && $type_beg_next eq 'k'
+ && $token_beg_next =~ /^(if|unless)$/ )
+ {
+ $self->flush_vertical_aligner();
+ }
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
- }
- }
- return;
- }
+ $self->undo_ci( $ri_first, $ri_last );
- } # end shortcut methods
+ $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
+ $starting_in_quote )
+ if ( $rOpts->{'logical-padding'} );
- # debug stuff
- $DEBUG_SPARSE && do {
- print STDOUT
-"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
+ # loop to prepare each line for shipment
+ my $in_comma_list;
+ my ( $Kbeg, $type_beg, $token_beg );
+ my ( $Kend, $type_end );
+ for my $n ( 0 .. $n_last_line ) {
- };
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ my $rline = $rlines_K->[$n];
+ my $forced_breakpoint = $rline->[2];
- #---------------------------------------------------------------
- # Compound List Rule 2:
- # If this list is too long for one line, and it is an item of a
- # larger list, then we must format it, regardless of sparsity
- # (ian.t). One reason that we have to do this is to trigger
- # Compound List Rule 1, above, which causes breaks at all commas of
- # all outer lists. In this way, the structure will be properly
- # displayed.
- #---------------------------------------------------------------
+ # we may need to look at variables on three consecutive lines ...
- # Decide if this list is too long for one line unless broken
- my $total_columns = table_columns_available($i_opening_paren);
- my $too_long = $packed_columns > $total_columns;
+ # Some vars on line [n-1], if any:
+ my $Kbeg_last = $Kbeg;
+ my $type_beg_last = $type_beg;
+ my $token_beg_last = $token_beg;
+ my $Kend_last = $Kend;
+ my $type_end_last = $type_end;
- # For a paren list, include the length of the token just before the
- # '(' because this is likely a sub call, and we would have to
- # include the sub name on the same line as the list. This is still
- # imprecise, but not too bad. (steve.t)
- if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
+ # Some vars on line [n]:
+ $Kbeg = $Kbeg_next;
+ $type_beg = $type_beg_next;
+ $token_beg = $token_beg_next;
+ $Kend = $Kend_next;
+ $type_end = $type_end_next;
- $too_long = $self->excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
+ # We use two slightly different definitions of level jump at the end
+ # of line:
+ # $ljump is the level jump needed by 'sub set_adjusted_indentation'
+ # $level_jump is the level jump needed by the vertical aligner.
+ my $ljump = 0; # level jump at end of line
- # FIXME: For an item after a '=>', try to include the length of the
- # thing before the '=>'. This is crude and should be improved by
- # actually looking back token by token.
- if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
- my $i_opening_minus = $i_opening_paren - 4;
- if ( $i_opening_minus >= 0 ) {
- $too_long = $self->excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
+ # Get some vars on line [n+1], if any:
+ if ( $n < $n_last_line ) {
+ ( $Kbeg_next, $Kend_next ) =
+ @{ $rlines_K->[ $n + 1 ] };
+ $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
+ $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
+ $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
- # Always break lists contained in '[' and '{' if too long for 1 line,
- # and always break lists which are too long and part of a more complex
- # structure.
- my $must_break_open_container = $must_break_open
- || ( $too_long
- && ( $in_hierarchical_list || $opening_token ne '(' ) );
+ # level jump at end of line for the vertical aligner:
+ my $level_jump =
+ $Kend >= $Klimit
+ ? 0
+ : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
-#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+ $self->delete_needless_alignments( $ibeg, $iend,
+ $ralignment_type_to_go );
- #---------------------------------------------------------------
- # The main decision:
- # Now decide if we will align the data into aligned columns. Do not
- # attempt to align columns if this is a tiny table or it would be
- # too spaced. It seems that the more packed lines we have, the
- # sparser the list that can be allowed and still look ok.
- #---------------------------------------------------------------
+ my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
+ $self->make_alignment_patterns( $ibeg, $iend,
+ $ralignment_type_to_go );
- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
- || ( $formatted_lines < 2 )
- || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
- )
- {
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
+ = $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
+ $rpatterns, $ri_first, $ri_last,
+ $rindentation_list, $ljump, $starting_in_quote,
+ $is_static_block_comment, );
- #---------------------------------------------------------------
- # too sparse: would look ugly if aligned in a table;
- #---------------------------------------------------------------
+ # we will allow outdenting of long lines..
+ my $outdent_long_lines = (
- # use old breakpoints if this is a 'big' list
- # FIXME: See if this is still necessary. sub sweep_left_to_right
- # now fixes a lot of problems.
- if ( $packed_lines > 2 && $item_count > 10 ) {
- write_logfile_entry("List sparse: using old breakpoints\n");
- $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
- }
+ # which are long quotes, if allowed
+ ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
- # let the continuation logic handle it if 2 lines
- else {
+ # which are long block comments, if allowed
+ || (
+ $type_beg eq '#'
+ && $rOpts->{'outdent-long-comments'}
- my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ );
- unless ($must_break_open_container) {
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
- }
- }
- }
- return;
- }
+ my $rvertical_tightness_flags =
+ $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
- #---------------------------------------------------------------
- # go ahead and format as a table
- #---------------------------------------------------------------
- write_logfile_entry(
- "List: auto formatting with $number_of_fields fields/row\n");
+ # flush an outdented line to avoid any unwanted vertical alignment
+ $self->flush_vertical_aligner() if ($is_outdented_line);
- my $j_first_break =
- $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
+ # Set a flag at the final ':' of a ternary chain to request
+ # vertical alignment of the final term. Here is a
+ # slightly complex example:
+ #
+ # $self->{_text} = (
+ # !$section ? ''
+ # : $type eq 'item' ? "the $section entry"
+ # : "the section on $section"
+ # )
+ # . (
+ # $page
+ # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+ # : ' elsewhere in this document'
+ # );
+ #
+ my $is_terminal_ternary = 0;
- for (
- my $j = $j_first_break ;
- $j < $comma_count ;
- $j += $number_of_fields
- )
- {
- my $i = $rcomma_index->[$j];
- $self->set_forced_breakpoint($i);
- }
- return;
- }
-} ## end closure set_comma_breakpoints_do
+ if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
+ my $last_leading_type = $n > 0 ? $type_beg_last : ':';
+ if ( $terminal_type ne ';'
+ && $n_last_line > $n
+ && $level_end == $lev )
+ {
+ $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
+ $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
+ }
+ if (
+ $last_leading_type eq ':'
+ && ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $terminal_type ne ':' && $level_end < $lev ) )
+ )
+ {
-sub study_list_complexity {
+ # the terminal term must not contain any ternary terms, as in
+ # my $ECHO = (
+ # $Is_MSWin32 ? ".\\echo$$"
+ # : $Is_MacOS ? ":echo$$"
+ # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
+ # );
+ $is_terminal_ternary = 1;
- # Look for complex tables which should be formatted with one term per line.
- # Returns the following:
- #
- # \@i_ragged_break_list = list of good breakpoints to avoid lines
- # which are hard to read
- # $number_of_fields_best = suggested number of fields based on
- # complexity; = 0 if any number may be used.
- #
- my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
- my $item_count = @{$ri_term_begin};
- my $complex_item_count = 0;
- my $number_of_fields_best = $rOpts->{'maximum-fields-per-table'};
- my $i_max = @{$ritem_lengths} - 1;
- ##my @item_complexity;
+ my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
+ while ( defined($KP) && $KP <= $Kend ) {
+ my $type_KP = $rLL->[$KP]->[_TYPE_];
+ if ( $type_KP eq '?' || $type_KP eq ':' ) {
+ $is_terminal_ternary = 0;
+ last;
+ }
+ $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
+ }
+ }
+ }
- my $i_last_last_break = -3;
- my $i_last_break = -2;
- my @i_ragged_break_list;
+ my $level_adj = $lev;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $level_adj = $radjusted_levels->[$Kbeg];
+ if ( $level_adj < 0 ) { $level_adj = 0 }
+ }
- my $definitely_complex = 30;
- my $definitely_simple = 12;
- my $quote_count = 0;
+ # add any new closing side comment to the last line
+ if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
+ $rfields->[-1] .= " $closing_side_comment";
- for my $i ( 0 .. $i_max ) {
- my $ib = $ri_term_begin->[$i];
- my $ie = $ri_term_end->[$i];
+ # NOTE: Patch for csc. We can just use 1 for the length of the csc
+ # because its length should not be a limiting factor from here on.
+ $rfield_lengths->[-1] += 2;
+ }
- # define complexity: start with the actual term length
- my $weighted_length = ( $ritem_lengths->[$i] - 2 );
+ # send this new line down the pipe
+ my $rvalign_hash = {};
+ $rvalign_hash->{level} = $lev;
+ $rvalign_hash->{level_end} = $level_end;
+ $rvalign_hash->{level_adj} = $level_adj;
+ $rvalign_hash->{indentation} = $indentation;
+ $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list;
+ $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
+ $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
+ $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
+ $rvalign_hash->{do_not_pad} = $do_not_pad;
+ $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
+ $rvalign_hash->{level_jump} = $level_jump;
+ $rvalign_hash->{rfields} = $rfields;
+ $rvalign_hash->{rpatterns} = $rpatterns;
+ $rvalign_hash->{rtokens} = $rtokens;
+ $rvalign_hash->{rfield_lengths} = $rfield_lengths;
+ $rvalign_hash->{terminal_block_type} = $terminal_block_type;
+ $rvalign_hash->{batch_count} = $batch_count;
- ##TBD: join types here and check for variations
- ##my $str=join "", @tokens_to_go[$ib..$ie];
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->valign_input($rvalign_hash);
- my $is_quote = 0;
- if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
- $is_quote = 1;
- $quote_count++;
- }
- elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
- $quote_count++;
- }
+ $in_comma_list = $type_end eq ',' && $forced_breakpoint;
- if ( $ib eq $ie ) {
- if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- else {
- }
- }
- else {
- if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
- $weighted_length += 4;
- }
- }
+ # flush an outdented line to avoid any unwanted vertical alignment
+ $self->flush_vertical_aligner() if ($is_outdented_line);
- # add weight for extra tokens.
- $weighted_length += 2 * ( $ie - $ib );
+ $do_not_pad = 0;
-## my $BUB = join '', @tokens_to_go[$ib..$ie];
-## print "# COMPLEXITY:$weighted_length $BUB\n";
+ # Set flag indicating if this line ends in an opening
+ # token and is very short, so that a blank line is not
+ # needed if the subsequent line is a comment.
+ # Examples of what we are looking for:
+ # {
+ # && (
+ # BEGIN {
+ # default {
+ # sub {
+ $self->[_last_output_short_opening_token_]
-##push @item_complexity, $weighted_length;
+ # line ends in opening token
+ = $type_end =~ /^[\{\(\[L]$/
- # now mark a ragged break after this item it if it is 'long and
- # complex':
- if ( $weighted_length >= $definitely_complex ) {
+ # and either
+ && (
+ # line has either single opening token
+ $Kend == $Kbeg
- # if we broke after the previous term
- # then break before it too
- if ( $i_last_break == $i - 1
- && $i > 1
- && $i_last_last_break != $i - 2 )
- {
+ # or is a single token followed by opening token.
+ # Note that sub identifiers have blanks like 'sub doit'
+ || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ )
+ )
- ## FIXME: don't strand a small term
- pop @i_ragged_break_list;
- push @i_ragged_break_list, $i - 2;
- push @i_ragged_break_list, $i - 1;
- }
+ # and limit total to 10 character widths
+ && token_sequence_length( $ibeg, $iend ) <= 10;
- push @i_ragged_break_list, $i;
- $i_last_last_break = $i_last_break;
- $i_last_break = $i;
- }
+ } # end of loop to output each line
- # don't break before a small last term -- it will
- # not look good on a line by itself.
- elsif ($i == $i_max
- && $i_last_break == $i - 1
- && $weighted_length <= $definitely_simple )
- {
- pop @i_ragged_break_list;
- }
+ # remember indentation of lines containing opening containers for
+ # later use by sub set_adjusted_indentation
+ $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+
+ # output any new -cscw block comment
+ if ($cscw_block_comment) {
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
}
+ return;
+}
- my $identifier_count = $i_max + 1 - $quote_count;
+{ ## begin closure set_vertical_alignment_markers
+ my %is_vertical_alignment_type;
+ my %is_not_vertical_alignment_token;
+ my %is_vertical_alignment_keyword;
+ my %is_terminal_alignment_type;
+ my %is_low_level_alignment_token;
- # Need more tuning here..
- if ( $max_width > 12
- && $complex_item_count > $item_count / 2
- && $number_of_fields_best != 2 )
- {
- $number_of_fields_best = 1;
- }
+ BEGIN {
- return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
-}
+ my @q;
-sub get_maximum_fields_wanted {
+ # Replaced =~ and // in the list. // had been removed in RT 119588
+ @q = qw#
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ { ? : => && || ~~ !~~ =~ !~ // <=> ->
+ #;
+ @is_vertical_alignment_type{@q} = (1) x scalar(@q);
- # Not all tables look good with more than one field of items.
- # This routine looks at a table and decides if it should be
- # formatted with just one field or not.
- # This coding is still under development.
- my ($ritem_lengths) = @_;
+ # These 'tokens' are not aligned. We need this to remove [
+ # from the above list because it has type ='{'
+ @q = qw([);
+ @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
- my $number_of_fields_best = 0;
+ # these are the only types aligned at a line end
+ @q = qw(&& || =>);
+ @is_terminal_alignment_type{@q} = (1) x scalar(@q);
- # For just a few items, we tentatively assume just 1 field.
- my $item_count = @{$ritem_lengths};
- if ( $item_count <= 5 ) {
- $number_of_fields_best = 1;
+ # these tokens only align at line level
+ @q = ( '{', '(' );
+ @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+
+ # eq and ne were removed from this list to improve alignment chances
+ @q = qw(if unless and or err for foreach while until);
+ @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
}
- # For larger tables, look at it both ways and see what looks best
- else {
+ sub set_vertical_alignment_markers {
- my $is_odd = 1;
- my @max_length = ( 0, 0 );
- my @last_length_2 = ( undef, undef );
- my @first_length_2 = ( undef, undef );
- my $last_length = undef;
- my $total_variation_1 = 0;
- my $total_variation_2 = 0;
- my @total_variation_2 = ( 0, 0 );
+ # This routine takes the first step toward vertical alignment of the
+ # lines of output text. It looks for certain tokens which can serve as
+ # vertical alignment markers (such as an '=').
+ #
+ # Method: We look at each token $i in this output batch and set
+ # $ralignment_type_to_go->[$i] equal to those tokens at which we would
+ # accept vertical alignment.
- foreach my $j ( 0 .. $item_count - 1 ) {
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
- $is_odd = 1 - $is_odd;
- my $length = $ritem_lengths->[$j];
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
+ my $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ my $ralignment_type_to_go;
- if ( defined($last_length) ) {
- my $dl = abs( $length - $last_length );
- $total_variation_1 += $dl;
- }
- $last_length = $length;
+ # Initialize the alignment array. Note that closing side comments can
+ # insert up to 2 additional tokens beyond the original
+ # $max_index_to_go, so we need to check ri_last for the last index.
+ my $max_line = @{$ri_first} - 1;
+ my $iend = $ri_last->[$max_line];
+ if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
+ for my $i ( 0 .. $iend ) {
+ $ralignment_type_to_go->[$i] = '';
+ }
- my $ll = $last_length_2[$is_odd];
- if ( defined($ll) ) {
- my $dl = abs( $length - $ll );
- $total_variation_2[$is_odd] += $dl;
- }
- else {
- $first_length_2[$is_odd] = $length;
- }
- $last_length_2[$is_odd] = $length;
+ # nothing to do if we aren't allowed to change whitespace
+ if ( !$rOpts_add_whitespace ) {
+ return $ralignment_type_to_go;
}
- $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
- my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
- unless ( $total_variation_2 < $factor * $total_variation_1 ) {
- $number_of_fields_best = 1;
+ # remember the index of last nonblank token before any sidecomment
+ my $i_terminal = $max_index_to_go;
+ if ( $types_to_go[$i_terminal] eq '#' ) {
+ if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
+ if ( $i_terminal > 0 ) { --$i_terminal }
+ }
}
- }
- return ($number_of_fields_best);
-}
-sub table_columns_available {
- my $i_first_comma = shift;
- my $columns =
- maximum_line_length($i_first_comma) -
- leading_spaces_to_go($i_first_comma);
+ # look at each line of this batch..
+ my $last_vertical_alignment_before_index;
+ my $vert_last_nonblank_type;
+ my $vert_last_nonblank_token;
+ my $vert_last_nonblank_block_type;
- # Patch: the vertical formatter does not line up lines whose lengths
- # exactly equal the available line length because of allowances
- # that must be made for side comments. Therefore, the number of
- # available columns is reduced by 1 character.
- $columns -= 1;
- return $columns;
-}
+ foreach my $line ( 0 .. $max_line ) {
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
+ $last_vertical_alignment_before_index = -1;
+ $vert_last_nonblank_type = '';
+ $vert_last_nonblank_token = '';
+ $vert_last_nonblank_block_type = '';
-sub maximum_number_of_fields {
+ # look at each token in this output line..
+ my $level_beg = $levels_to_go[$ibeg];
+ foreach my $i ( $ibeg .. $iend ) {
+ my $alignment_type = '';
+ my $type = $types_to_go[$i];
+ my $block_type = $block_type_to_go[$i];
+ my $token = $tokens_to_go[$i];
- # how many fields will fit in the available space?
- my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
- my $max_pairs = int( $columns / $pair_width );
- my $number_of_fields = $max_pairs * 2;
- if ( $odd_or_even == 1
- && $max_pairs * $pair_width + $max_width <= $columns )
- {
- $number_of_fields++;
- }
- return $number_of_fields;
-}
+ # do not align tokens at lower level then start of line
+ # except for side comments
+ if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
+ && $types_to_go[$i] ne '#' )
+ {
+ $ralignment_type_to_go->[$i] = '';
+ next;
+ }
-sub compactify_table {
+ #--------------------------------------------------------
+ # First see if we want to align BEFORE this token
+ #--------------------------------------------------------
- # given a table with a certain number of fields and a certain number
- # of lines, see if reducing the number of fields will make it look
- # better.
- my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
- if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
- my $min_fields;
+ # The first possible token that we can align before
+ # is index 2 because: 1) it doesn't normally make sense to
+ # align before the first token and 2) the second
+ # token must be a blank if we are to align before
+ # the third
+ if ( $i < $ibeg + 2 ) { }
- for (
- $min_fields = $number_of_fields ;
- $min_fields >= $odd_or_even
- && $min_fields * $formatted_lines >= $item_count ;
- $min_fields -= $odd_or_even
- )
- {
- $number_of_fields = $min_fields;
- }
- }
- return $number_of_fields;
-}
+ # must follow a blank token
+ elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
-sub set_ragged_breakpoints {
+ # align a side comment --
+ elsif ( $type eq '#' ) {
- # Set breakpoints in a list that cannot be formatted nicely as a
- # table.
- my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
+ my $KK = $K_to_go[$i];
+ my $sc_type = $rspecial_side_comment_type->{$KK};
- my $break_count = 0;
- foreach ( @{$ri_ragged_break_list} ) {
- my $j = $ri_term_comma->[$_];
- if ($j) {
- $self->set_forced_breakpoint($j);
- $break_count++;
- }
- }
- return $break_count;
-}
+ unless (
-sub copy_old_breakpoints {
- my ( $self, $i_first_comma, $i_last_comma ) = @_;
- for my $i ( $i_first_comma .. $i_last_comma ) {
- if ( $old_breakpoint_to_go[$i] ) {
- $self->set_forced_breakpoint($i);
- }
- }
- return;
-}
+ # it is any specially marked side comment
+ $sc_type
-sub set_nobreaks {
- my ( $self, $i, $j ) = @_;
- if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
+ # or it is a static side comment
+ || ( $rOpts->{'static-side-comments'}
+ && $token =~ /$static_side_comment_pattern/ )
- 0 && do {
- my ( $a, $b, $c ) = caller();
- my $forced_breakpoint_count = get_forced_breakpoint_count();
- print STDOUT
-"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
- };
+ # or a closing side comment
+ || ( $vert_last_nonblank_block_type
+ && $token =~
+ /$closing_side_comment_prefix_pattern/ )
+ )
+ {
+ $alignment_type = $type;
+ } ## Example of a static side comment
+ }
- @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
- }
+ # otherwise, do not align two in a row to create a
+ # blank field
+ elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
- # shouldn't happen; non-critical error
- else {
- 0 && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
- "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
- };
- }
- return;
-}
+ # align before one of these keywords
+ # (within a line, since $i>1)
+ elsif ( $type eq 'k' ) {
-{ ## begin closure set_forced_breakpoint
+ # /^(if|unless|and|or|eq|ne)$/
+ if ( $is_vertical_alignment_keyword{$token} ) {
+ $alignment_type = $token;
+ }
+ }
- my $forced_breakpoint_count;
- my $forced_breakpoint_undo_count;
- my @forced_breakpoint_undo_stack;
- my $index_max_forced_break;
+ # align before one of these types..
+ # Note: add '.' after new vertical aligner is operational
+ elsif ( $is_vertical_alignment_type{$type}
+ && !$is_not_vertical_alignment_token{$token} )
+ {
+ $alignment_type = $token;
- sub initialize_forced_breakpoint_vars {
- $forced_breakpoint_count = 0;
- $index_max_forced_break = UNDEFINED_INDEX;
- $forced_breakpoint_undo_count = 0;
- @forced_breakpoint_undo_stack = ();
- return;
- }
+ # Do not align a terminal token. Although it might
+ # occasionally look ok to do this, this has been found to be
+ # a good general rule. The main problems are:
+ # (1) that the terminal token (such as an = or :) might get
+ # moved far to the right where it is hard to see because
+ # nothing follows it, and
+ # (2) doing so may prevent other good alignments.
+ # Current exceptions are && and || and =>
+ if ( $i == $iend || $i >= $i_terminal ) {
+ $alignment_type = ""
+ unless ( $is_terminal_alignment_type{$type} );
+ }
- sub get_forced_breakpoint_count {
- return $forced_breakpoint_count;
- }
+ # Do not align leading ': (' or '. ('. This would prevent
+ # alignment in something like the following:
+ # $extra_space .=
+ # ( $input_line_number < 10 ) ? " "
+ # : ( $input_line_number < 100 ) ? " "
+ # : "";
+ # or
+ # $code =
+ # ( $case_matters ? $accessor : " lc($accessor) " )
+ # . ( $yesno ? " eq " : " ne " )
- sub get_forced_breakpoint_undo_count {
- return $forced_breakpoint_undo_count;
- }
+ # Also, do not align a ( following a leading ? so we can
+ # align something like this:
+ # $converter{$_}->{ushortok} =
+ # $PDL::IO::Pic::biggrays
+ # ? ( m/GIF/ ? 0 : 1 )
+ # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
+ if ( $i == $ibeg + 2
+ && $types_to_go[$ibeg] =~ /^[\.\:\?]$/
+ && $types_to_go[ $i - 1 ] eq 'b' )
+ {
+ $alignment_type = "";
+ }
- sub get_index_max_forced_break {
- return $index_max_forced_break;
- }
+ # Certain tokens only align at the same level as the
+ # initial line level
+ if ( $is_low_level_alignment_token{$token}
+ && $levels_to_go[$i] != $level_beg )
+ {
+ $alignment_type = "";
+ }
- sub set_fake_breakpoint {
+ # For a paren after keyword, only align something like this:
+ # if ( $a ) { &a }
+ # elsif ( $b ) { &b }
+ if ( $token eq '(' ) {
- # Just bump up the breakpoint count as a signal that there are breaks.
- # This is useful if we have breaks but may want to postpone deciding where
- # to make them.
- $forced_breakpoint_count++;
- return;
- }
+ if ( $vert_last_nonblank_type eq 'k' ) {
+ $alignment_type = ""
+ unless $vert_last_nonblank_token =~
+ /^(if|unless|elsif)$/;
+ }
+ }
+
+ # be sure the alignment tokens are unique
+ # This didn't work well: reason not determined
+ # if ($token ne $type) {$alignment_type .= $type}
+ }
+
+ # NOTE: This is deactivated because it causes the previous
+ # if/elsif alignment to fail
+ #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+ #{ $alignment_type = $type; }
+
+ if ($alignment_type) {
+ $last_vertical_alignment_before_index = $i;
+ }
- my $DEBUG_FORCE;
+ #--------------------------------------------------------
+ # Next see if we want to align AFTER the previous nonblank
+ #--------------------------------------------------------
- sub set_forced_breakpoint {
- my ( $self, $i ) = @_;
+ # We want to line up ',' and interior ';' tokens, with the added
+ # space AFTER these tokens. (Note: interior ';' is included
+ # because it may occur in short blocks).
+ if (
- return unless defined $i && $i >= 0;
+ # we haven't already set it
+ !$alignment_type
- # no breaks between welded tokens
- return if ( $self->weld_len_right_to_go($i) );
+ # and its not the first token of the line
+ && ( $i > $ibeg )
- # when called with certain tokens, use bond strengths to decide
- # if we break before or after it
- my $token = $tokens_to_go[$i];
+ # and it follows a blank
+ && $types_to_go[ $i - 1 ] eq 'b'
- if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
- if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
- }
+ # and previous token IS one of these:
+ && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
- # breaks are forced before 'if' and 'unless'
- elsif ( $is_if_unless{$token} ) { $i-- }
+ # and it's NOT one of these
+ && ( $type !~ /^[b\#\)\]\}]$/ )
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
+ # then go ahead and align
+ )
- $DEBUG_FORCE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
- };
+ {
+ $alignment_type = $vert_last_nonblank_type;
+ }
- ######################################################################
- # NOTE: if we call set_closing_breakpoint below it will then call this
- # routing back. So there is the possibility of an infinite loop if a
- # programming error is made. As a precaution, I have added a check on
- # the forced_breakpoint flag, so that we won't keep trying to set it.
- # That will give additional protection against a loop.
- ######################################################################
- if ( $i_nonblank >= 0
- && $nobreak_to_go[$i_nonblank] == 0
- && !$forced_breakpoint_to_go[$i_nonblank] )
- {
- $forced_breakpoint_to_go[$i_nonblank] = 1;
+ #--------------------------------------------------------
+ # Undo alignment in special cases
+ #--------------------------------------------------------
+ if ($alignment_type) {
- if ( $i_nonblank > $index_max_forced_break ) {
- $index_max_forced_break = $i_nonblank;
+ # do not align the opening brace of an anonymous sub
+ if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
+ $alignment_type = "";
+ }
}
- $forced_breakpoint_count++;
- $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
- = $i_nonblank;
- # if we break at an opening container..break at the closing
- if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
- $self->set_closing_breakpoint($i_nonblank);
+ #--------------------------------------------------------
+ # then store the value
+ #--------------------------------------------------------
+ $ralignment_type_to_go->[$i] = $alignment_type;
+ if ( $type ne 'b' ) {
+ $vert_last_nonblank_type = $type;
+ $vert_last_nonblank_token = $token;
+ $vert_last_nonblank_block_type = $block_type;
}
}
}
- return;
- }
-
- sub clear_breakpoint_undo_stack {
- my ($self) = @_;
- $forced_breakpoint_undo_count = 0;
- return;
+ return $ralignment_type_to_go;
}
+} ## end closure set_vertical_alignment_markers
- my $DEBUG_UNDOBP;
-
- sub undo_forced_breakpoint_stack {
+sub get_seqno {
- my ( $self, $i_start ) = @_;
- if ( $i_start < 0 ) {
- $i_start = 0;
- my ( $a, $b, $c ) = caller();
- warning(
-"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
- );
- }
+ # get opening and closing sequence numbers of a token for the vertical
+ # aligner. Assign qw quotes a value to allow qw opening and closing tokens
+ # to be treated somewhat like opening and closing tokens for stacking
+ # tokens by the vertical aligner.
+ my ( $self, $ii, $ending_in_quote ) = @_;
- while ( $forced_breakpoint_undo_count > $i_start ) {
- my $i =
- $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- $forced_breakpoint_to_go[$i] = 0;
- $forced_breakpoint_count--;
+ my $rLL = $self->[_rLL_];
+ my $this_batch = $self->[_this_batch_];
+ my $rK_to_go = $this_batch->[_rK_to_go_];
- $DEBUG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
- };
- }
+ my $KK = $rK_to_go->[$ii];
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- # shouldn't happen, but not a critical error
- else {
- $DEBUG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
- };
+ if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
+ my $SEQ_QW = -1;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $ii > 0 ) {
+ $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
+ }
+ else {
+ if ( !$ending_in_quote ) {
+ $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
}
}
- return;
}
-} ## end closure set_forced_breakpoint
+ return ($seqno);
+}
-{ ## begin closure recombine_breakpoints
+sub undo_ci {
- # This routine is called once per batch to see if it would be better
- # to combine some of the lines into which the batch has been broken.
+ # Undo continuation indentation in certain sequences
+ # For example, we can undo continuation indentation in sort/map/grep chains
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ # To align the map/sort/grep keywords like this:
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my ( $line_1, $line_2, $lev_last );
+ my $this_line_is_semicolon_terminated;
+ my $max_line = @{$ri_first} - 1;
- my %is_amp_amp;
- my %is_ternary;
- my %is_math_op;
- my %is_plus_minus;
- my %is_mult_div;
+ # looking at each line of this batch..
+ # We are looking at leading tokens and looking for a sequence
+ # all at the same level and higher level than enclosing lines.
+ foreach my $line ( 0 .. $max_line ) {
- BEGIN {
+ my $ibeg = $ri_first->[$line];
+ my $lev = $levels_to_go[$ibeg];
+ if ( $line > 0 ) {
- my @q;
- @q = qw( && || );
- @is_amp_amp{@q} = (1) x scalar(@q);
+ # if we have started a chain..
+ if ($line_1) {
- @q = qw( ? : );
- @is_ternary{@q} = (1) x scalar(@q);
+ # see if it continues..
+ if ( $lev == $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
- @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
+ # chain continues...
+ # check for chain ending at end of a statement
+ if ( $line == $max_line ) {
+
+ # see of this line ends a statement
+ my $iend = $ri_last->[$line];
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' );
+ }
+ $line_2 = $line if ($this_line_is_semicolon_terminated);
+ }
+ else {
+
+ # kill chain
+ $line_1 = undef;
+ }
+ }
+ elsif ( $lev < $lev_last ) {
+
+ # chain ends with previous line
+ $line_2 = $line - 1;
+ }
+ elsif ( $lev > $lev_last ) {
+
+ # kill chain
+ $line_1 = undef;
+ }
+
+ # undo the continuation indentation if a chain ends
+ if ( defined($line_2) && defined($line_1) ) {
+ my $continuation_line_count = $line_2 - $line_1 + 1;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
+ (0) x ($continuation_line_count)
+ if ( $continuation_line_count >= 0 );
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
+ = @reduced_spaces_to_go[ @{$ri_first}
+ [ $line_1 .. $line_2 ] ];
+ $line_1 = undef;
+ }
+ }
+
+ # not in a chain yet..
+ else {
+
+ # look for start of a new sort/map/grep chain
+ if ( $lev > $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+ $line_1 = $line;
+ }
+ }
+ }
+ }
+ $lev_last = $lev;
+ }
+ return;
+}
+
+{ ## begin closure set_logical_padding
+ my %is_math_op;
- @q = qw( + - );
- @is_plus_minus{@q} = (1) x scalar(@q);
+ BEGIN {
- @q = qw( * / );
- @is_mult_div{@q} = (1) x scalar(@q);
+ my @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
}
- sub Debug_dump_breakpoints {
+ sub set_logical_padding {
- # Debug routine to dump current breakpoints...not normally called
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $self, $ri_beg, $ri_end, $msg ) = @_;
- print STDERR "----Dumping breakpoints from: $msg----\n";
- for my $n ( 0 .. @{$ri_end} - 1 ) {
- my $ibeg = $ri_beg->[$n];
- my $iend = $ri_end->[$n];
- my $text = "";
- foreach my $i ( $ibeg .. $iend ) {
- $text .= $tokens_to_go[$i];
- }
- print STDERR "$n ($ibeg:$iend) $text\n";
- }
- print STDERR "----\n";
- return;
- }
+ # Look at a batch of lines and see if extra padding can improve the
+ # alignment when there are certain leading operators. Here is an
+ # example, in which some extra space is introduced before
+ # '( $year' to make it line up with the subsequent lines:
+ #
+ # if ( ( $Year < 1601 )
+ # || ( $Year > 2899 )
+ # || ( $EndYear < 1601 )
+ # || ( $EndYear > 2899 ) )
+ # {
+ # &Error_OutOfRange;
+ # }
+ #
+ my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
+ = @_;
+ my $max_line = @{$ri_first} - 1;
- sub delete_one_line_semicolons {
+ my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
- my ( $self, $ri_beg, $ri_end ) = @_;
- my $rLL = $self->[_rLL_];
- my $K_opening_container = $self->[_K_opening_container_];
+ # looking at each line of this batch..
+ foreach my $line ( 0 .. $max_line - 1 ) {
- # Walk down the lines of this batch and delete any semicolons
- # terminating one-line blocks;
- my $nmax = @{$ri_end} - 1;
+ # see if the next line begins with a logical operator
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
+ $ibeg_next = $ri_first->[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
- foreach my $n ( 0 .. $nmax ) {
- my $i_beg = $ri_beg->[$n];
- my $i_e = $ri_end->[$n];
- my $K_beg = $K_to_go[$i_beg];
- my $K_e = $K_to_go[$i_e];
- my $K_end = $K_e;
- my $type_end = $rLL->[$K_end]->[_TYPE_];
- if ( $type_end eq '#' ) {
- $K_end = $self->K_previous_nonblank($K_end);
- if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
- }
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
- # we are looking for a line ending in closing brace
+ next unless ($has_leading_op_next);
+
+ # next line must not be at lesser depth
next
- unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
+ if ( $nesting_depth_to_go[$ibeg] >
+ $nesting_depth_to_go[$ibeg_next] );
- # ...and preceded by a semicolon on the same line
- my $K_semicolon = $self->K_previous_nonblank($K_end);
- next unless defined($K_semicolon);
- my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
- next if ( $i_semicolon <= $i_beg );
- next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
+ # identify the token in this line to be padded on the left
+ $ipad = undef;
- # safety check - shouldn't happen
- if ( $types_to_go[$i_semicolon] ne ';' ) {
- Fault("unexpected type looking for semicolon, ignoring");
- next;
- }
+ # handle lines at same depth...
+ if ( $nesting_depth_to_go[$ibeg] ==
+ $nesting_depth_to_go[$ibeg_next] )
+ {
- # ... with the corresponding opening brace on the same line
- my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
- my $K_opening = $K_opening_container->{$type_sequence};
- next unless ( defined($K_opening) );
- my $i_opening = $i_beg + ( $K_opening - $K_beg );
- next if ( $i_opening < $i_beg );
+ # if this is not first line of the batch ...
+ if ( $line > 0 ) {
- # ... and only one semicolon between these braces
- my $semicolon_count = 0;
- foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
- if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
- $semicolon_count++;
- last;
- }
- }
- next if ($semicolon_count);
+ # and we have leading operator..
+ next if $has_leading_op;
- # ...ok, then make the semicolon invisible
- $tokens_to_go[$i_semicolon] = "";
- $token_lengths_to_go[$i_semicolon] = 0;
- $rLL->[$K_semicolon]->[_TOKEN_] = "";
- $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
- }
- return;
- }
+ # Introduce padding if..
+ # 1. the previous line is at lesser depth, or
+ # 2. the previous line ends in an assignment
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
+ # Example 1: previous line at lesser depth
+ # if ( ( $Year < 1601 ) # <- we are here but
+ # || ( $Year > 2899 ) # list has not yet
+ # || ( $EndYear < 1601 ) # collapsed vertically
+ # || ( $EndYear > 2899 ) )
+ # {
+ #
+ # Example 2: previous line ending in assignment:
+ # $leapyear =
+ # $year % 4 ? 0 # <- We are here
+ # : $year % 100 ? 1
+ # : $year % 400 ? 0
+ # : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
- sub unmask_phantom_semicolons {
+ # be sure levels agree (do not indent after an indented 'if')
+ next
+ if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
- my ( $self, $ri_beg, $ri_end ) = @_;
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
- # Walk down the lines of this batch and unmask any invisible line-ending
- # semicolons. They were placed by sub respace_tokens but we only now
- # know if we actually need them.
- my $rLL = $self->[_rLL_];
+ next
+ unless (
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
+ || ( $nesting_depth_to_go[$ibegm] <
+ $nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
+ );
- my $nmax = @{$ri_end} - 1;
- foreach my $n ( 0 .. $nmax ) {
+ # we will add padding before the first token
+ $ipad = $ibeg;
+ }
- my $i = $ri_end->[$n];
- if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
+ # for first line of the batch..
+ else {
- my $tok = ';';
- my $tok_len = 1;
- if ( $want_left_space{';'} != WS_NO ) {
- $tok = ' ;';
- $tok_len = 2;
- }
- $tokens_to_go[$i] = $tok;
- $token_lengths_to_go[$i] = $tok_len;
- my $KK = $K_to_go[$i];
- $rLL->[$KK]->[_TOKEN_] = $tok;
- $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
- my $line_number = 1 + $self->get_old_line_index($KK);
- $self->note_added_semicolon($line_number);
- }
- }
- return;
- }
+ # WARNING: Never indent if first line is starting in a
+ # continued quote, which would change the quote.
+ next if $starting_in_quote;
- sub recombine_breakpoints {
+ # if this is text after closing '}'
+ # then look for an interior token to pad
+ if ( $types_to_go[$ibeg] eq '}' ) {
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Sometimes small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine.
- #
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $self, $ri_beg, $ri_end ) = @_;
+ }
- my $rOpts_short_concatenation_item_length =
- $rOpts->{'short-concatenation-item-length'};
- my $rOpts_break_at_old_semicolon_breakpoints =
- $rOpts->{'break-at-old-semicolon-breakpoints'};
+ # otherwise, we might pad if it looks really good
+ else {
+
+ # we might pad token $ibeg, so be sure that it
+ # is at the same depth as the next line.
+ next
+ if ( $nesting_depth_to_go[$ibeg] !=
+ $nesting_depth_to_go[$ibeg_next] );
+
+ # We can pad on line 1 of a statement if at least 3
+ # lines will be aligned. Otherwise, it
+ # can look very confusing.
+
+ # We have to be careful not to pad if there are too few
+ # lines. The current rule is:
+ # (1) in general we require at least 3 consecutive lines
+ # with the same leading chain operator token,
+ # (2) but an exception is that we only require two lines
+ # with leading colons if there are no more lines. For example,
+ # the first $i in the following snippet would get padding
+ # by the second rule:
+ #
+ # $i == 1 ? ( "First", "Color" )
+ # : $i == 2 ? ( "Then", "Rarity" )
+ # : ( "Then", "Name" );
- # Make a list of all good joining tokens between the lines
- # n-1 and n.
- my @joint;
- my $nmax = @{$ri_end} - 1;
- for my $n ( 1 .. $nmax ) {
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
+ if ( $max_line > 1 ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
- my ( $itok, $itokp, $itokm );
+ # never indent line 1 of a '.' series because
+ # previous line is most likely at same level.
+ # TODO: we should also look at the leasing_spaces
+ # of the last output line and skip if it is same
+ # as this line.
+ next if ( $leading_token eq '.' );
- foreach my $itest ( $iend_1, $ibeg_2 ) {
- my $type = $types_to_go[$itest];
- if ( $is_math_op{$type}
- || $is_amp_amp{$type}
- || $is_assignment{$type}
- || $type eq ':' )
- {
- $itok = $itest;
+ my $count = 1;
+ foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
+ my $ibeg_next_next = $ri_first->[ $line + $l ];
+ if ( $tokens_to_go[$ibeg_next_next] ne
+ $leading_token )
+ {
+ $tokens_differ = 1;
+ last;
+ }
+ $count++;
+ }
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
+ $ipad = $ibeg;
+ }
+ else {
+ next;
+ }
+ }
}
}
- $joint[$n] = [$itok];
- }
- my $more_to_do = 1;
+ # find interior token to pad if necessary
+ if ( !defined($ipad) ) {
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @{$ri_end};
- my $reverse = 0;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $nmax = @{$ri_end} - 1;
+ for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
- # Safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
+ # find any unclosed container
+ next
+ unless ( $type_sequence_to_go[$i]
+ && $self->mate_index_to_go($i) > $iend );
- # Shouldn't happen because splice below decreases nmax on each
- # pass.
- Fault("Program bug-infinite loop in recombine breakpoints\n");
+ # find next nonblank token to pad
+ $ipad = $inext_to_go[$i];
+ last if ( $ipad > $iend );
+ }
+ last unless $ipad;
}
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $skip_Section_3;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
-
- # loop over all remaining lines in this batch
- for my $iter ( 1 .. $nmax ) {
-
- # alternating sweep direction gives symmetric results
- # for recombining lines which exceed the line length
- # such as eval {{{{.... }}}}
- my $n;
- if ($reverse) { $n = 1 + $nmax - $iter; }
- else { $n = $iter }
-
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # -----line $n-1--- | -----line $n-----
- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
- # ^
- # |
- # We want to decide if we should remove the line break
- # between the tokens at $iend_1 and $ibeg_2
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- #
- # beginning and ending tokens of the lines we are working on
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
- my $ibeg_nmax = $ri_beg->[$nmax];
- # combined line cannot be too long
- my $excess =
- $self->excess_line_length( $ibeg_1, $iend_2, 1, 1 );
- next if ( $excess > 0 );
+ # We cannot pad the first leading token of a file because
+ # it could cause a bug in which the starting indentation
+ # level is guessed incorrectly each time the code is run
+ # though perltidy, thus causing the code to march off to
+ # the right. For example, the following snippet would have
+ # this problem:
- my $type_iend_1 = $types_to_go[$iend_1];
- my $type_iend_2 = $types_to_go[$iend_2];
- my $type_ibeg_1 = $types_to_go[$ibeg_1];
- my $type_ibeg_2 = $types_to_go[$ibeg_2];
+## ov_method mycan( $package, '(""' ), $package
+## or ov_method mycan( $package, '(0+' ), $package
+## or ov_method mycan( $package, '(bool' ), $package
+## or ov_method mycan( $package, '(nomethod' ), $package;
- # terminal token of line 2 if any side comment is ignored:
- my $iend_2t = $iend_2;
- my $type_iend_2t = $type_iend_2;
+ # If this snippet is within a block this won't happen
+ # unless the user just processes the snippet alone within
+ # an editor. In that case either the user will see and
+ # fix the problem or it will be corrected next time the
+ # entire file is processed with perltidy.
+ next if ( $ipad == 0 && $peak_batch_size <= 1 );
- # some beginning indexes of other lines, which may not exist
- my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
- my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
- my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
+## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
+## IT DID MORE HARM THAN GOOD
+## ceil(
+## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
+## / $upem
+## ),
+##? # do not put leading padding for just 2 lines of math
+##? if ( $ipad == $ibeg
+##? && $line > 0
+##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
+##? && $is_math_op{$type_next}
+##? && $line + 2 <= $max_line )
+##? {
+##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
+##? my $type_next_next = $types_to_go[$ibeg_next_next];
+##? next if !$is_math_op{$type_next_next};
+##? }
- my $bs_tweak = 0;
+ # next line must not be at greater depth
+ my $iend_next = $ri_last->[ $line + 1 ];
+ next
+ if ( $nesting_depth_to_go[ $iend_next + 1 ] >
+ $nesting_depth_to_go[$ipad] );
- #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
- # $nesting_depth_to_go[$ibeg_1] );
+ # lines must be somewhat similar to be padded..
+ my $inext_next = $inext_to_go[$ibeg_next];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
- 0 && do {
- print STDERR
-"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
- };
+ # see if there are multiple continuation lines
+ my $logical_continuation_lines = 1;
+ if ( $line + 2 <= $max_line ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
+ && $nesting_depth_to_go[$ibeg_next] eq
+ $nesting_depth_to_go[$ibeg_next_next] )
+ {
+ $logical_continuation_lines++;
+ }
+ }
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
- next
- if ( $type_ibeg_2 eq ';'
- && $rOpts_break_at_old_semicolon_breakpoints );
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
+ }
- # a terminal '{' should stay where it is
- # unless preceded by a fat comma
- next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
+ if (
- if ( $type_iend_2 eq '#'
- && $iend_2 - $ibeg_2 >= 2
- && $types_to_go[ $iend_2 - 1 ] eq 'b' )
- {
- $iend_2t = $iend_2 - 2;
- $type_iend_2t = $types_to_go[$iend_2t];
- }
+ # either we have multiple continuation lines to follow
+ # and we are not padding the first token
+ ( $logical_continuation_lines > 1 && $ipad > 0 )
- $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
- }
+ # or..
+ || (
- #----------------------------------------------------------
- # Recombine Section 0:
- # Examine the special token joining this line pair, if any.
- # Put as many tests in this section to avoid duplicate code and
- # to make formatting independent of whether breaks are to the
- # left or right of an operator.
- #----------------------------------------------------------
+ # types must match
+ $types_match
- my ($itok) = @{ $joint[$n] };
- if ($itok) {
+ # and keywords must match if keyword
+ && !(
+ $type eq 'k'
+ && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
+ )
+ )
+ )
+ {
- # FIXME: Patch - may not be necessary
- my $iend_1 =
- $type_iend_1 eq 'b'
- ? $iend_1 - 1
- : $iend_1;
+ #----------------------begin special checks--------------
+ #
+ # SPECIAL CHECK 1:
+ # A check is needed before we can make the pad.
+ # If we are in a list with some long items, we want each
+ # item to stand out. So in the following example, the
+ # first line beginning with '$casefold->' would look good
+ # padded to align with the next line, but then it
+ # would be indented more than the last line, so we
+ # won't do it.
+ #
+ # ok(
+ # $casefold->{code} eq '0041'
+ # && $casefold->{status} eq 'C'
+ # && $casefold->{mapping} eq '0061',
+ # 'casefold 0x41'
+ # );
+ #
+ # Note:
+ # It would be faster, and almost as good, to use a comma
+ # count, and not pad if comma_count > 1 and the previous
+ # line did not end with a comma.
+ #
+ my $ok_to_pad = 1;
- my $iend_2 =
- $type_iend_2 eq 'b'
- ? $iend_2 - 1
- : $iend_2;
- ## END PATCH
+ my $ibg = $ri_first->[ $line + 1 ];
+ my $depth = $nesting_depth_to_go[ $ibg + 1 ];
- my $type = $types_to_go[$itok];
+ # just use simplified formula for leading spaces to avoid
+ # needless sub calls
+ my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
- if ( $type eq ':' ) {
+ # look at each line beyond the next ..
+ my $l = $line + 1;
+ foreach my $ltest ( $line + 2 .. $max_line ) {
+ $l = $ltest;
+ my $ibg = $ri_first->[$l];
- # do not join at a colon unless it disobeys the break request
- if ( $itok eq $iend_1 ) {
- next unless $want_break_before{$type};
- }
- else {
- $leading_amp_count++;
- next if $want_break_before{$type};
- }
- } ## end if ':'
+ # quit looking at the end of this container
+ last
+ if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
+ || ( $nesting_depth_to_go[$ibg] < $depth );
- # handle math operators + - * /
- elsif ( $is_math_op{$type} ) {
+ # cannot do the pad if a later line would be
+ # outdented more
+ if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
+ $ok_to_pad = 0;
+ last;
+ }
+ }
- # Combine these lines if this line is a single
- # number, or if it is a short term with same
- # operator as the previous line. For example, in
- # the following code we will combine all of the
- # short terms $A, $B, $C, $D, $E, $F, together
- # instead of leaving them one per line:
- # my $time =
- # $A * $B * $C * $D * $E * $F *
- # ( 2. * $eps * $sigma * $area ) *
- # ( 1. / $tcold**3 - 1. / $thot**3 );
+ # don't pad if we end in a broken list
+ if ( $l == $max_line ) {
+ my $i2 = $ri_last->[$l];
+ if ( $types_to_go[$i2] eq '#' ) {
+ my $i1 = $ri_first->[$l];
+ next if $self->terminal_type_i( $i1, $i2 ) eq ',';
+ }
+ }
- # This can be important in math-intensive code.
+ # SPECIAL CHECK 2:
+ # a minus may introduce a quoted variable, and we will
+ # add the pad only if this line begins with a bare word,
+ # such as for the word 'Button' here:
+ # [
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ #
+ # On the other hand, if 'Button' is quoted, it looks best
+ # not to pad:
+ # [
+ # 'Button' => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ if ( $types_to_go[$ibeg_next] eq 'm' ) {
+ $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
+ }
- my $good_combo;
+ next unless $ok_to_pad;
- my $itokp = min( $inext_to_go[$itok], $iend_2 );
- my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
- my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
- my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+ #----------------------end special check---------------
- # check for a number on the right
- if ( $types_to_go[$itokp] eq 'n' ) {
+ my $length_1 = total_line_length( $ibeg, $ipad - 1 );
+ my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
+ $pad_spaces = $length_2 - $length_1;
- # ok if nothing else on right
- if ( $itokp == $iend_2 ) {
- $good_combo = 1;
- }
- else {
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
- # look one more token to right..
- # okay if math operator or some termination
- $good_combo =
- ( ( $itokpp == $iend_2 )
- && $is_math_op{ $types_to_go[$itokpp] } )
- || $types_to_go[$itokpp] =~ /^[#,;]$/;
- }
+ # make sure this won't change if -lp is used
+ my $indentation_1 = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation_1) ) {
+ if ( $indentation_1->get_recoverable_spaces() == 0 ) {
+ my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
+ unless ( $indentation_2->get_recoverable_spaces() == 0 )
+ {
+ $pad_spaces = 0;
}
+ }
+ }
- # check for a number on the left
- if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+ # we might be able to handle a pad of -1 by removing a blank
+ # token
+ if ( $pad_spaces < 0 ) {
- # okay if nothing else to left
- if ( $itokm == $ibeg_1 ) {
- $good_combo = 1;
- }
+ # Deactivated for -kpit due to conflict. This block deletes
+ # a space in an attempt to improve alignment in some cases,
+ # but it may conflict with user spacing requests. For now
+ # it is just deactivated if the -kpit option is used.
+ if ( $pad_spaces == -1 ) {
+ if ( $ipad > $ibeg
+ && $types_to_go[ $ipad - 1 ] eq 'b'
+ && !%keyword_paren_inner_tightness )
+ {
+ $self->pad_token( $ipad - 1, $pad_spaces );
+ }
+ }
+ $pad_spaces = 0;
+ }
- # otherwise look one more token to left
- else {
+ # now apply any padding for alignment
+ if ( $ipad >= 0 && $pad_spaces ) {
- # okay if math operator, comma, or assignment
- $good_combo = ( $itokmm == $ibeg_1 )
- && ( $is_math_op{ $types_to_go[$itokmm] }
- || $types_to_go[$itokmm] =~ /^[,]$/
- || $is_assignment{ $types_to_go[$itokmm] }
- );
- }
- }
+ my $length_t = total_line_length( $ibeg, $iend );
+ if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
+ {
+ $self->pad_token( $ipad, $pad_spaces );
+ }
+ }
+ }
+ }
+ continue {
+ $iendm = $iend;
+ $ibegm = $ibeg;
+ $has_leading_op = $has_leading_op_next;
+ } # end of loop over lines
+ return;
+ }
+} ## end closure set_logical_padding
- # look for a single short token either side of the
- # operator
- if ( !$good_combo ) {
+sub pad_token {
- # Slight adjustment factor to make results
- # independent of break before or after operator in
- # long summed lists. (An operator and a space make
- # two spaces).
- my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+ # insert $pad_spaces before token number $ipad
+ my ( $self, $ipad, $pad_spaces ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $KK = $K_to_go[$ipad];
+ my $tok = $rLL->[$KK]->[_TOKEN_];
+ my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
- $good_combo =
+ if ( $pad_spaces > 0 ) {
+ $tok = ' ' x $pad_spaces . $tok;
+ $tok_len += $pad_spaces;
+ }
+ elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
+ $tok = "";
+ $tok_len = 0;
+ }
+ else {
- # numbers or id's on both sides of this joint
- $types_to_go[$itokp] =~ /^[in]$/
- && $types_to_go[$itokm] =~ /^[in]$/
+ # shouldn't happen
+ return;
+ }
- # one of the two lines must be short:
- && (
- (
- # no more than 2 nonblank tokens right of
- # joint
- $itokpp == $iend_2
+ $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
+ $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
- # short
- && token_sequence_length( $itokp, $iend_2 )
- < $two +
- $rOpts_short_concatenation_item_length
- )
- || (
- # no more than 2 nonblank tokens left of
- # joint
- $itokmm == $ibeg_1
+ $token_lengths_to_go[$ipad] += $pad_spaces;
+ $tokens_to_go[$ipad] = $tok;
- # short
- && token_sequence_length( $ibeg_1, $itokm )
- < 2 - $two +
- $rOpts_short_concatenation_item_length
- )
+ foreach my $i ( $ipad .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
+ }
+ return;
+}
- )
+sub mate_index_to_go {
+ my ( $self, $i ) = @_;
- # keep pure terms; don't mix +- with */
- && !(
- $is_plus_minus{$type}
- && ( $is_mult_div{ $types_to_go[$itokmm] }
- || $is_mult_div{ $types_to_go[$itokpp] } )
- )
- && !(
- $is_mult_div{$type}
- && ( $is_plus_minus{ $types_to_go[$itokmm] }
- || $is_plus_minus{ $types_to_go[$itokpp] } )
- )
+ # Return the matching index of a container or ternary pair
+ # This is equivalent to the array @mate_index_to_go
+ my $K = $K_to_go[$i];
+ my $K_mate = $self->K_mate_index($K);
+ my $i_mate = -1;
+ if ( defined($K_mate) ) {
+ $i_mate = $i + ( $K_mate - $K );
+ if ( $i_mate < 0 || $i_mate > $max_index_to_go ) {
+ $i_mate = -1;
+ }
+ }
+ my $i_mate_alt = $mate_index_to_go[$i];
- ;
- }
+ # FIXME: Old Debug code which can be removed eventually
+ if ( 0 && $i_mate_alt != $i_mate ) {
+ my $tok = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
+ my $tok_mate = '*';
+ my $type_mate = '*';
+ if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) {
+ $tok_mate = $tokens_to_go[$i_mate];
+ $type_mate = $types_to_go[$i_mate];
+ }
+ my $seq = $type_sequence_to_go[$i];
+ my $file = get_input_stream_name();
- # it is also good to combine if we can reduce to 2 lines
- if ( !$good_combo ) {
+ Warn(
+"mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate"
+ );
+ }
+ return $i_mate;
+}
- # index on other line where same token would be in a
- # long chain.
- my $iother =
- ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+sub K_mate_index {
- $good_combo =
- $n == 2
- && $n == $nmax
- && $types_to_go[$iother] ne $type;
- }
+ # Given the index K of an opening or closing container, or ?/: ternary pair,
+ # return the index K of the other member of the pair.
+ my ( $self, $K ) = @_;
+ return unless defined($K);
+ my $rLL = $self->[_rLL_];
+ my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_];
+ return unless ($seqno);
- next unless ($good_combo);
+ my $K_opening = $self->[_K_opening_container_]->{$seqno};
+ if ( defined($K_opening) ) {
+ if ( $K != $K_opening ) { return $K_opening }
+ return $self->[_K_closing_container_]->{$seqno};
+ }
- } ## end math
+ $K_opening = $self->[_K_opening_ternary_]->{$seqno};
+ if ( defined($K_opening) ) {
+ if ( $K != $K_opening ) { return $K_opening }
+ return $self->[_K_closing_ternary_]->{$seqno};
+ }
+ return;
+}
- elsif ( $is_amp_amp{$type} ) {
- ##TBD
- } ## end &&, ||
+{ ## begin closure make_alignment_patterns
- elsif ( $is_assignment{$type} ) {
- ##TBD
- } ## end assignment
- }
+ my %block_type_map;
+ my %keyword_map;
+ my %operator_map;
- #----------------------------------------------------------
- # Recombine Section 1:
- # Join welded nested containers immediately
- #----------------------------------------------------------
- if ( $self->weld_len_right_to_go($iend_1)
- || $self->weld_len_left_to_go($ibeg_2) )
- {
- $n_best = $n;
+ BEGIN {
- # Old coding alternated sweep direction: no longer needed
- # $reverse = 1 - $reverse;
- last;
- }
- $reverse = 0;
+ # map related block names into a common name to
+ # allow alignment
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
- #----------------------------------------------------------
- # Recombine Section 2:
- # Examine token at $iend_1 (right end of first line of pair)
- #----------------------------------------------------------
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
- # an isolated '}' may join with a ';' terminated segment
- if ( $type_iend_1 eq '}' ) {
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
- #
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
- #
- # which makes the parens line up.
- #
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
- #
- # But we do not want to do this for something like the -lp
- # option where the paren is not outdentable because the
- # trailing clause will be far to the right.
- #
- # The logic here is synchronized with the logic in sub
- # sub set_adjusted_indentation, which actually does
- # the outdenting.
- #
- $skip_Section_3 ||= $this_line_is_semicolon_terminated
+ # map certain operators to the same class for pattern matching
+ %operator_map = (
+ '!~' => '=~',
+ '+=' => '+=',
+ '-=' => '+=',
+ '*=' => '+=',
+ '/=' => '+=',
+ );
+ }
- # only one token on last line
- && $ibeg_1 == $iend_1
+ sub delete_needless_alignments {
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
- # must be structural paren
- && $tokens_to_go[$iend_1] eq ')'
+ # Remove unwanted alignments. This routine is a place to remove
+ # alignments which might cause problems at later stages. There are
+ # currently two types of fixes:
- # style must allow outdenting,
- && !$closing_token_indentation{')'}
+ # 1. Remove excess parens
+ # 2. Remove alignments within 'elsif' conditions
- # only leading '&&', '||', and ':' if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
+ # Patch #1: Excess alignment of parens can prevent other good
+ # alignments. For example, note the parens in the first two rows of
+ # the following snippet. They would normally get marked for alignment
+ # and aligned as follows:
- # but leading colons probably line up with a
- # previous colon or question (count could be wrong).
- && $type_ibeg_2 ne ':'
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$iend_1] ==
- $nesting_depth_to_go[$iend_2] + 1 );
+ # This causes unnecessary paren alignment and prevents the third equals
+ # from aligning. If we remove the unwanted alignments we get:
- # YVES patch 2 of 2:
- # Allow cuddled eval chains, like this:
- # eval {
- # #STUFF;
- # 1; # return true
- # } or do {
- # #handle error
- # };
- # This patch works together with a patch in
- # setting adjusted indentation (where the closing eval
- # brace is outdented if possible).
- # The problem is that an 'eval' block has continuation
- # indentation and it looks better to undo it in some
- # cases. If we do not use this patch we would get:
- # eval {
- # #STUFF;
- # 1; # return true
- # }
- # or do {
- # #handle error
- # };
- # The alternative, for uncuddled style, is to create
- # a patch in set_adjusted_indentation which undoes
- # the indentation of a leading line like 'or do {'.
- # This doesn't work well with -icb through
- if (
- $block_type_to_go[$iend_1] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'}
- && $tokens_to_go[$iend_2] eq '{'
- && (
- ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
- || ( $type_ibeg_2 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_2] } )
- || $is_if_unless{ $tokens_to_go[$ibeg_2] }
- )
- )
- {
- $skip_Section_3 ||= 1;
- }
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
- next
- unless (
- $skip_Section_3
+ # A rule for doing this which works well is to remove alignment of
+ # parens whose containers do not contain other aligning tokens, with
+ # the exception that we always keep alignment of the first opening
+ # paren on a line (for things like 'if' and 'elsif' statements).
- # handle '.' and '?' specially below
- || ( $type_ibeg_2 =~ /^[\.\?]$/ )
- );
+ # Setup needed constants
+ my $i_good_paren = -1;
+ my $imin_match = $iend + 1;
+ my $i_elsif_close = $ibeg - 1;
+ my $i_elsif_open = $iend + 1;
+ if ( $iend > $ibeg ) {
+ if ( $types_to_go[$ibeg] eq 'k' ) {
+
+ # Paren patch: mark a location of a paren we should keep, such
+ # as one following something like a leading 'if', 'elsif',..
+ $i_good_paren = $ibeg + 1;
+ if ( $types_to_go[$i_good_paren] eq 'b' ) {
+ $i_good_paren++;
}
- elsif ( $type_iend_1 eq '{' ) {
+ # 'elsif' patch: remember the range of the parens of an elsif,
+ # and do not make alignments within them because this can cause
+ # loss of padding and overall brace alignment in the vertical
+ # aligner.
+ if ( $tokens_to_go[$ibeg] eq 'elsif'
+ && $i_good_paren < $iend
+ && $tokens_to_go[$i_good_paren] eq '(' )
+ {
+ $i_elsif_open = $i_good_paren;
+ $i_elsif_close = $self->mate_index_to_go($i_good_paren);
+ }
+ }
+ }
- # YVES
- # honor breaks at opening brace
- # Added to prevent recombining something like this:
- # } || eval { package main;
- next if $forced_breakpoint_to_go[$iend_1];
+ # Loop to make the fixes on this line
+ my @imatch_list;
+ for my $i ( $ibeg .. $iend ) {
+
+ if ( $ralignment_type_to_go->[$i] ) {
+
+ # Patch #2: undo alignment within elsif parens
+ if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
+ $ralignment_type_to_go->[$i] = '';
+ next;
}
+ push @imatch_list, $i;
- # do not recombine lines with ending &&, ||,
- elsif ( $is_amp_amp{$type_iend_1} ) {
- next unless $want_break_before{$type_iend_1};
+ }
+ if ( $tokens_to_go[$i] eq ')' ) {
+
+ # Patch #1: undo the corresponding opening paren if:
+ # - it is at the top of the stack
+ # - and not the first overall opening paren
+ # - does not follow a leading keyword on this line
+ my $imate = $self->mate_index_to_go($i);
+ if ( @imatch_list
+ && $imatch_list[-1] eq $imate
+ && ( $ibeg > 1 || @imatch_list > 1 )
+ && $imate > $i_good_paren )
+ {
+ $ralignment_type_to_go->[$imate] = '';
+ pop @imatch_list;
}
+ }
+ }
+ return;
+ }
- # Identify and recombine a broken ?/: chain
- elsif ( $type_iend_1 eq '?' ) {
+ my $field_length_sum = sub {
+ my ( $i1, $i2 ) = @_;
+ my $len_field = 0;
+ foreach ( $i1 .. $i2 ) {
+ $len_field += $token_lengths_to_go[$_];
+ }
+ return $len_field;
+ };
- # Do not recombine different levels
- next
- if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+ sub make_alignment_patterns {
- # do not recombine unless next line ends in :
- next unless $type_iend_2 eq ':';
- }
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create three arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The three arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+ my @tokens = ();
+ my @fields = ();
+ my @patterns = ();
+ my @field_lengths = ();
+ my $i_start = $ibeg;
- # for lines ending in a comma...
- elsif ( $type_iend_1 eq ',' ) {
+ my $depth = 0;
+ my %container_name = ( 0 => "" );
- # Do not recombine at comma which is following the
- # input bias.
- # TODO: might be best to make a special flag
- next if ( $old_breakpoint_to_go[$iend_1] );
+ my $j = 0; # field index
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $type_ibeg_1 eq '}'
- && $type_ibeg_2 eq 'i' )
- {
- next
- unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
- && ( $iend_2 == ( $ibeg_2 + 1 ) )
- && $this_line_is_semicolon_terminated );
+ $patterns[0] = "";
+ my %token_count;
+ for my $i ( $ibeg .. $iend ) {
- # override breakpoint
- $forced_breakpoint_to_go[$iend_1] = 0;
+ # Keep track of containers balanced on this line only.
+ # These are used below to prevent unwanted cross-line alignments.
+ # Unbalanced containers already avoid aligning across
+ # container boundaries.
+ my $tok = $tokens_to_go[$i];
+ my $depth_last = $depth;
+ if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
+
+ # if container is balanced on this line...
+ my $i_mate = $self->mate_index_to_go($i);
+ if ( $i_mate > $i && $i_mate <= $iend ) {
+ $depth++;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
+
+ # Containers beginning with { and [ are given those names
+ # for uniqueness. That way commas in different containers
+ # will not match. Here is an example of what this prevents:
+ # a => [ 1, 2, 3 ],
+ # b => { b1 => 4, b2 => 5 },
+ # Here is another example of what we avoid by labeling the
+ # commas properly:
+
+ # is_d( [ $a, $a ], [ $b, $c ] );
+ # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
+ # is_d( [ \$a, \$a ], [ \$b, \$c ] );
+
+ my $name = $tok;
+ if ( $tok eq '(' ) {
+ $name = $self->make_paren_name($i);
}
+ $container_name{$depth} = "+" . $name;
- # but otherwise ..
- else {
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment because
+ # it usually looks bad to align commas within containers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas if
+ # opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we will
+ # append the length of the line from the previous matching
+ # token, or beginning of line, to the function name. This
+ # will allow the vertical aligner to reject undesirable
+ # matches.
- # do not recombine after a comma unless this will leave
- # just 1 more line
- next unless ( $n + 1 >= $nmax );
+ # if we are not aligning on this paren...
+ if ( !$ralignment_type_to_go->[$i] ) {
- # do not recombine if there is a change in indentation depth
- next
- if (
- $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+ # Sum length from previous alignment
+ my $len = token_sequence_length( $i_start, $i - 1 );
- # do not recombine a "complex expression" after a
- # comma. "complex" means no parens.
- my $saw_paren;
- foreach my $ii ( $ibeg_2 .. $iend_2 ) {
- if ( $tokens_to_go[$ii] eq '(' ) {
- $saw_paren = 1;
- last;
- }
+ # Minor patch: do not include the length of any '!'.
+ # Otherwise, commas in the following line will not
+ # match
+ # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
+ # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
+ if ( grep { $_ eq '!' }
+ @types_to_go[ $i_start .. $i - 1 ] )
+ {
+ $len -= 1;
}
- next if $saw_paren;
- }
- }
- # opening paren..
- elsif ( $type_iend_1 eq '(' ) {
+ if ( $i_start == $ibeg ) {
- # No longer doing this
+ # For first token, use distance from start of line
+ # but subtract off the indentation due to level.
+ # Otherwise, results could vary with indentation.
+ $len += leading_spaces_to_go($ibeg) -
+ $levels_to_go[$i_start] * $rOpts_indent_columns;
+ if ( $len < 0 ) { $len = 0 }
+ }
+
+ # tack this length onto the container name to try
+ # to make a unique token name
+ $container_name{$depth} .= "-" . $len;
+ }
}
+ }
+ elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) {
+ $depth-- if $depth > 0;
+ }
+
+ # if we find a new synchronization token, we are done with
+ # a field
+ if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
- elsif ( $type_iend_1 eq ')' ) {
+ my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
- # No longer doing this
- }
+ # map similar items
+ my $tok_map = $operator_map{$tok};
+ $tok = $tok_map if ($tok_map);
- # keep a terminal for-semicolon
- elsif ( $type_iend_1 eq 'f' ) {
- next;
+ # make separators in different nesting depths unique
+ # by appending the nesting depth digit.
+ if ( $raw_tok ne '#' ) {
+ $tok .= "$nesting_depth_to_go[$i]";
}
- # if '=' at end of line ...
- elsif ( $is_assignment{$type_iend_1} ) {
-
- # keep break after = if it was in input stream
- # this helps prevent 'blinkers'
- next if $old_breakpoint_to_go[$iend_1]
-
- # don't strand an isolated '='
- && $iend_1 != $ibeg_1;
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
- my $is_short_quote =
- ( $type_ibeg_2 eq 'Q'
- && $ibeg_2 == $iend_2
- && token_sequence_length( $ibeg_2, $ibeg_2 ) <
- $rOpts_short_concatenation_item_length );
- my $is_ternary =
- ( $type_ibeg_1 eq '?'
- && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
+ # If we are at an opening token which increased depth, we have
+ # to use the name from the previous depth.
+ my $depth_p =
+ ( $depth_last < $depth ? $depth_last : $depth );
+ if ( $container_name{$depth_p} ) {
+ $tok .= $container_name{$depth_p};
+ }
+ }
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $ibeg_1 != $iend_1
- && !$is_short_quote
- && !$is_ternary )
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ my $ci = $ci_levels_to_go[$ibeg];
+ if ( $container_name{$depth} =~ /^\+(if|unless)/
+ && $ci )
{
- next
- unless (
- (
+ $tok .= $container_name{$depth};
+ }
+ }
- # unless we can reduce this to two lines
- $nmax < $n + 2
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ibeg_nmax] eq ';' )
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+ if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
- # or the next line ends in an open paren or brace
- # and the break hasn't been forced [dima.t]
- || ( !$forced_breakpoint_to_go[$iend_1]
- && $type_iend_2 eq '{' )
- )
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && (
+ $tok .= $block_type;
+ }
- # RT#127633 - the leading tokens are not operators
- ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+ # Mark multiple copies of certain tokens with the copy number
+ # This will allow the aligner to decide if they are matched.
+ # For now, only do this for equals. For example, the two
+ # equals on the next line will be labeled '=0' and '=0.2'.
+ # Later, the '=0.2' will be ignored in alignment because it
+ # has no match.
- # or they are different
- || ( $ibeg_3 >= 0
- && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
- )
- );
+ # $| = $debug = 1 if $opt_d;
+ # $full_index = 1 if $opt_i;
- if (
+ if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
+ $token_count{$tok}++;
+ if ( $token_count{$tok} > 1 ) {
+ $tok .= '.' . $token_count{$tok};
+ }
+ }
- # Recombine if we can make two lines
- $nmax >= $n + 2
+ # concatenate the text of the consecutive tokens to form
+ # the field
+ push( @fields,
+ join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have
- # ending comma
- && ( !$rOpts_line_up_parentheses
- || $type_iend_2 ne ',' )
- )
- {
+ push @field_lengths, $field_length_sum->( $i_start, $i - 1 );
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$ibeg_2];
- foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # store the alignment token for this field
+ push( @tokens, $tok );
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
+ # get ready for the next batch
+ $i_start = $i;
+ $j++;
+ $patterns[$j] = "";
+ }
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
+ # continue accumulating tokens
+ # handle non-keywords..
+ if ( $types_to_go[$i] ne 'k' ) {
+ my $type = $types_to_go[$i];
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax )
- ? $ri_end->[ $n + 1 ]
- : $iend_2;
- foreach my $i ( $iend_2 .. $istop ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 2 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # Mark most things before arrows as a quote to
+ # get them to line up. Testfile: mixed.pl.
+ if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
+ my $next_type = $types_to_go[ $i + 1 ];
+ my $i_next_nonblank =
+ ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- # do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
- }
- }
- }
+ if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
+ $type = 'Q';
- unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$iend_1] = 0;
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
- # for keywords..
- elsif ( $type_iend_1 eq 'k' ) {
-
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
-
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
-
- # but only if followed by multiple lines
- && $n < $nmax
- );
-
- if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
- next
- unless $want_break_before{ $tokens_to_go[$iend_1] };
- }
+ # Convert a bareword within braces into a quote for matching.
+ # This will allow alignment of expressions like this:
+ # local ( $SIG{'INT'} ) = IGNORE;
+ # local ( $SIG{ALRM} ) = 'POSTMAN';
+ if ( $type eq 'w'
+ && $i > $ibeg
+ && $i < $iend
+ && $types_to_go[ $i - 1 ] eq 'L'
+ && $types_to_go[ $i + 1 ] eq 'R' )
+ {
+ $type = 'Q';
}
- #----------------------------------------------------------
- # Recombine Section 3:
- # Examine token at $ibeg_2 (left end of second line of pair)
- #----------------------------------------------------------
+ # patch to make numbers and quotes align
+ if ( $type eq 'n' ) { $type = 'Q' }
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- # Note that we are skipping the rest of this section
- # and the rest of the loop to do the join
- if ($skip_Section_3) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- $n_best = $n;
- last;
- }
+ # patch to ignore any ! in patterns
+ if ( $type eq '!' ) { $type = '' }
- # handle lines with leading &&, ||
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ $patterns[$j] .= $type;
+ }
- $leading_amp_count++;
+ # for keywords we have to use the actual text
+ else {
- # ok to recombine if it follows a ? or :
- # and is followed by an open paren..
- my $ok =
- ( $is_ternary{$type_ibeg_1}
- && $tokens_to_go[$iend_2] eq '(' )
+ my $tok = $tokens_to_go[$i];
- # or is followed by a ? or : at same depth
- #
- # We are looking for something like this. We can
- # recombine the && line with the line above to make the
- # structure more clear:
- # return
- # exists $G->{Attr}->{V}
- # && exists $G->{Attr}->{V}->{$u}
- # ? %{ $G->{Attr}->{V}->{$u} }
- # : ();
- #
- # We should probably leave something like this alone:
- # return
- # exists $G->{Attr}->{E}
- # && exists $G->{Attr}->{E}->{$u}
- # && exists $G->{Attr}->{E}->{$u}->{$v}
- # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
- # : ();
- # so that we either have all of the &&'s (or ||'s)
- # on one line, as in the first example, or break at
- # each one as in the second example. However, it
- # sometimes makes things worse to check for this because
- # it prevents multiple recombinations. So this is not done.
- || ( $ibeg_3 >= 0
- && $is_ternary{ $types_to_go[$ibeg_3] }
- && $nesting_depth_to_go[$ibeg_3] ==
- $nesting_depth_to_go[$ibeg_2] );
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok = $keyword_map{$tok}
+ if ( defined( $keyword_map{$tok} ) );
+ $patterns[$j] .= $tok;
+ }
+ }
- next if !$ok && $want_break_before{$type_ibeg_2};
- $forced_breakpoint_to_go[$iend_1] = 0;
+ # done with this line .. join text of tokens to make the last field
+ push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ push @field_lengths, $field_length_sum->( $i_start, $iend );
+ return ( \@tokens, \@fields, \@patterns, \@field_lengths );
+ }
- # tweak the bond strength to give this joint priority
- # over ? and :
- $bs_tweak = 0.25;
- }
+} ## end closure make_alignment_patterns
- # Identify and recombine a broken ?/: chain
- elsif ( $type_ibeg_2 eq '?' ) {
+sub make_paren_name {
+ my ( $self, $i ) = @_;
- # Do not recombine different levels
- my $lev = $levels_to_go[$ibeg_2];
- next if ( $lev ne $levels_to_go[$ibeg_1] );
+ # The token at index $i is a '('.
+ # Create an alignment name for it to avoid incorrect alignments.
- # Do not recombine a '?' if either next line or
- # previous line does not start with a ':'. The reasons
- # are that (1) no alignment of the ? will be possible
- # and (2) the expression is somewhat complex, so the
- # '?' is harder to see in the interior of the line.
- my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
- my $precedes_colon =
- $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
- next unless ( $follows_colon || $precedes_colon );
+ # Start with the name of the previous nonblank token...
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
- # we will always combining a ? line following a : line
- if ( !$follows_colon ) {
+ # Prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
+ }
- # ...otherwise recombine only if it looks like a chain.
- # we will just look at a few nearby lines to see if
- # this looks like a chain.
- my $local_count = 0;
- foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
- $local_count++
- if $ii >= 0
- && $types_to_go[$ii] eq ':'
- && $levels_to_go[$ii] == $lev;
- }
- next unless ( $local_count > 1 );
- }
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # Finally, remove any leading arrows
+ $name =~ s/^->//;
+ return $name;
+}
- # do not recombine lines with leading '.'
- elsif ( $type_ibeg_2 eq '.' ) {
- my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
- next
- unless (
+{ ## begin closure set_adjusted_indentation
- # ... unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- #
- #
- # $bodyA .=
- # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
- #
- # looks better than this:
- # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
- # . '$args .= $pat;'
+ my ( $last_indentation_written, $last_unadjusted_indentation,
+ $last_leading_token );
- (
- $n == 2
- && $n == $nmax
- && $type_ibeg_1 ne $type_ibeg_2
- )
+ sub initialize_adjusted_indentation {
+ $last_indentation_written = 0;
+ $last_unadjusted_indentation = 0;
+ $last_leading_token = "";
+ return;
+ }
- # ... or this would strand a short quote , like this
- # . "some long quote"
- # . "\n";
+ sub set_adjusted_indentation {
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $iend_2 - 1
- && $token_lengths_to_go[$i_next_nonblank] <
- $rOpts_short_concatenation_item_length )
- );
- }
+ # This routine has the final say regarding the actual indentation of
+ # a line. It starts with the basic indentation which has been
+ # defined for the leading token, and then takes into account any
+ # options that the user has set regarding special indenting and
+ # outdenting.
- # handle leading keyword..
- elsif ( $type_ibeg_2 eq 'k' ) {
+ my (
+ $self, $ibeg,
+ $iend, $rfields,
+ $rpatterns, $ri_first,
+ $ri_last, $rindentation_list,
+ $level_jump, $starting_in_quote,
+ $is_static_block_comment,
+ ) = @_;
- # handle leading "or"
- if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
- $type_ibeg_1 eq '}'
- || (
+ my $rLL = $self->[_rLL_];
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ # we need to know the last token of this line
+ my ( $terminal_type, $i_terminal ) =
+ $self->terminal_type_i( $ibeg, $iend );
- # important: only combine a very simple or
- # statement because the step below may have
- # combined a trailing 'and' with this or,
- # and we do not want to then combine
- # everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
- )
- )
- );
+ my $terminal_block_type = $block_type_to_go[$i_terminal];
+ my $is_outdented_line = 0;
- #X: RT #81854
- $forced_breakpoint_to_go[$iend_1] = 0
- unless $old_breakpoint_to_go[$iend_1];
- }
+ my $is_semicolon_terminated = $terminal_type eq ';'
+ && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
- # handle leading 'and' and 'xor'
- elsif ($tokens_to_go[$ibeg_2] eq 'and'
- || $tokens_to_go[$ibeg_2] eq 'xor' )
- {
+ # NOTE: A future improvement would be to make it semicolon terminated
+ # even if it does not have a semicolon but is followed by a closing
+ # block brace. This would undo ci even for something like the
+ # following, in which the final paren does not have a semicolon because
+ # it is a possible weld location:
- # Decide if we will combine a single terminal 'and'
- # after an 'if' or 'unless'.
+ # if ($BOLD_MATH) {
+ # (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # )
+ # }
+ #
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ # MOJO: Set a flag if this lines begins with ')->'
+ my $leading_paren_arrow = (
+ $types_to_go[$ibeg] eq '}'
+ && $tokens_to_go[$ibeg] eq ')'
+ && (
+ ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
+ || ( $ibeg < $i_terminal - 1
+ && $types_to_go[ $ibeg + 1 ] eq 'b'
+ && $types_to_go[ $ibeg + 2 ] eq '->' )
+ )
+ );
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
- || $tokens_to_go[$ibeg_1] eq 'or' )
- )
- );
- }
+ ##########################################################
+ # Section 1: set a flag and a default indentation
+ #
+ # Most lines are indented according to the initial token.
+ # But it is common to outdent to the level just after the
+ # terminal token in certain cases...
+ # adjust_indentation flag:
+ # 0 - do not adjust
+ # 1 - outdent
+ # 2 - vertically align with opening token
+ # 3 - indent
+ ##########################################################
+ my $adjust_indentation = 0;
+ my $default_adjust_indentation = $adjust_indentation;
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
+ my $type_beg = $types_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $K_beg = $K_to_go[$ibeg];
+ my $ibeg_weld_fix = $ibeg;
- # previous line begins with 'and' or 'or'
- && $type_ibeg_1 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_1] }
+ # QW PATCH 2 (Testing)
+ # At an isolated closing token of a qw quote which is welded to
+ # a following closing token, we will locally change its type to
+ # be the same as its token. This will allow formatting to be the
+ # same as for an ordinary closing token.
- );
+ # For -lp formatting se use $ibeg_weld_fix to get around the problem
+ # that with -lp type formatting the opening and closing tokens to not
+ # have sequence numbers.
+ if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) {
+ my $K_next_nonblank = $self->K_next_code($K_beg);
+ if ( defined($K_next_nonblank) ) {
+ my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
+ my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
+ my $welded = $self->weld_len_left( $type_sequence, $token );
+ if ($welded) {
+ my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
+ if ( $itest <= $max_index_to_go ) {
+ $ibeg_weld_fix = $itest;
}
+ $type_beg = ')'; ##$token_beg;
+ }
+ }
+ }
- # handle all other leading keywords
- else {
+ # if we are at a closing token of some type..
+ if ( $type_beg =~ /^[\)\}\]R]$/ ) {
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{$type_iend_1} ) {
- next
- if ( ( $type_iend_1 ne 'k' )
- && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
- }
- }
- }
+ # get the indentation of the line containing the corresponding
+ # opening token
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
+ $ri_last, $rindentation_list );
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ # First set the default behavior:
+ if (
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ # default behavior is to outdent closing lines
+ # of the form: "); }; ]; )->xxx;"
+ $is_semicolon_terminated
- next
- unless (
- $this_line_is_semicolon_terminated
+ # and 'cuddled parens' of the form: ")->pack("
+ # Bug fix for RT #123749]: the types here were
+ # incorrectly '(' and ')'. Corrected to be '{' and '}'
+ || (
+ $terminal_type eq '{'
+ && $type_beg eq '}'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
+ )
- # previous line begins with an 'if' or 'unless' keyword
- && $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ # remove continuation indentation for any line like
+ # } ... {
+ # or without ending '{' and unbalanced, such as
+ # such as '}->{$operator}'
+ || (
+ $type_beg eq '}'
- );
- }
+ && ( $types_to_go[$iend] eq '{'
+ || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
+ )
- # handle line with leading = or similar
- elsif ( $is_assignment{$type_ibeg_2} ) {
- next unless ( $n == 1 || $n == $nmax );
- next if $old_breakpoint_to_go[$iend_1];
- next
- unless (
+ # and when the next line is at a lower indentation level
+ # PATCH: and only if the style allows undoing continuation
+ # for all closing token types. We should really wait until
+ # the indentation of the next line is known and then make
+ # a decision, but that would require another pass.
+ || ( $level_jump < 0 && !$some_closing_token_indentation )
- # unless we can reduce this to two lines
- $nmax == 2
+ # Patch for -wn=2, multiple welded closing tokens
+ || ( $i_terminal > $ibeg
+ && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+ )
+ {
+ $adjust_indentation = 1;
+ }
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ # outdent something like '),'
+ if (
+ $terminal_type eq ','
- # or this is a short line ending in ;
- || ( $n == $nmax && $this_line_is_semicolon_terminated )
- );
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # Removed this constraint for -wn
+ # OLD: allow just one character before the comma
+ # && $i_terminal == $ibeg + 1
- #----------------------------------------------------------
- # Recombine Section 4:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ # require LIST environment; otherwise, we may outdent too much -
+ # this can happen in calls without parentheses (overload.t);
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
+ )
+ {
+ $adjust_indentation = 1;
+ }
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+ # undo continuation indentation of a terminal closing token if
+ # it is the last token before a level decrease. This will allow
+ # a closing token to line up with its opening counterpart, and
+ # avoids an indentation jump larger than 1 level.
+ if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
+ && $i_terminal == $ibeg
+ && defined($K_beg) )
+ {
+ my $K_next_nonblank = $self->K_next_code($K_beg);
- my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+ # Patch for RT#131115: honor -bli flag at closing brace
+ my $is_bli =
+ $rOpts->{'brace-left-and-indent'}
+ && $block_type_to_go[$i_terminal]
+ && $block_type_to_go[$i_terminal] =~ /$bli_pattern/;
- # Require a few extra spaces before recombining lines if we are
- # at an old breakpoint unless this is a simple list or terminal
- # line. The goal is to avoid oscillating between two
- # quasi-stable end states. For example this snippet caused
- # problems:
-## my $this =
-## bless {
-## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
-## },
-## $type;
- next
- if ( $old_breakpoint_to_go[$iend_1]
- && !$this_line_is_semicolon_terminated
- && $n < $nmax
- && $excess + 4 > 0
- && $type_iend_2 ne ',' );
+ if ( !$is_bli && defined($K_next_nonblank) ) {
+ my $lev = $rLL->[$K_beg]->[_LEVEL_];
+ my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
+ $adjust_indentation = 1 if ( $level_next < $lev );
+ }
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $ri_beg->[ $n + 1 ];
- next
- if (
- $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
- && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first,
+ $ri_last, $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
+ }
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $iend_1 - $ibeg_1 <= 2
- && $type_ibeg_1 eq 'k'
- && $tokens_to_go[$ibeg_1] eq 'if'
- && $tokens_to_go[$iend_1] ne '('
- )
- );
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indention of the line with
+ # the opening brace.
+ if ( $block_type_to_go[$ibeg] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
}
+ }
+
+ $default_adjust_indentation = $adjust_indentation;
+
+ # Now modify default behavior according to user request:
+ # handle option to indent non-blocks of the form ); }; ];
+ # But don't do special indentation to something like ')->pack('
+ if ( !$block_type_to_go[$ibeg] ) {
+
+ # Note that logical padding has already been applied, so we may
+ # need to remove some spaces to get a valid hash key.
+ my $tok = $tokens_to_go[$ibeg];
+ my $cti = $closing_token_indentation{$tok};
+ if ( !defined($cti) ) {
- # honor no-break's
- next if ( $bs >= NO_BREAK - 1 );
+ # $cti may not be defined for several reasons.
+ # -padding may have been applied so the character
+ # has a length > 1
+ # - we may have welded to a closing quote token.
+ # Here is an example (perltidy -wn):
+ # __PACKAGE__->load_components( qw(
+ # > Core
+ # >
+ # > ) );
+ $adjust_indentation = 0;
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
}
- else {
-
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
+ elsif ( $cti == 1 ) {
+ if ( $i_terminal <= $ibeg + 1
+ || $is_semicolon_terminated )
+ {
+ $adjust_indentation = 2;
+ }
+ else {
+ $adjust_indentation = 0;
}
}
+ elsif ( $cti == 2 ) {
+ if ($is_semicolon_terminated) {
+ $adjust_indentation = 3;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 3 ) {
+ $adjust_indentation = 3;
+ }
}
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @{$ri_beg}, $n_best, 1;
- splice @{$ri_end}, $n_best - 1, 1;
- splice @joint, $n_best, 1;
+ # handle option to indent blocks
+ else {
+ if (
+ $rOpts->{'indent-closing-brace'}
+ && (
+ $i_terminal == $ibeg # isolated terminal '}'
+ || $is_semicolon_terminated
+ )
+ ) # } xxxx ;
+ {
+ $adjust_indentation = 3;
+ }
+ }
+ }
- # keep going if we are still making progress
- $more_to_do++;
+ # if at ');', '};', '>;', and '];' of a terminal qw quote
+ elsif ($rpatterns->[0] =~ /^qb*;$/
+ && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
+ {
+ if ( $closing_token_indentation{$1} == 0 ) {
+ $adjust_indentation = 1;
+ }
+ else {
+ $adjust_indentation = 3;
}
}
- return ( $ri_beg, $ri_end );
- }
-} ## end closure recombine_breakpoints
-sub break_all_chain_tokens {
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $types_to_go[$ibeg] eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
- # scan the current breakpoints looking for breaks at certain "chain
- # operators" (. : && || + etc) which often occur repeatedly in a long
- # statement. If we see a break at any one, break at all similar tokens
- # within the same container.
- #
- my ( $self, $ri_left, $ri_right ) = @_;
+ ##########################################################
+ # Section 2: set indentation according to flag set above
+ #
+ # Select the indentation object to define leading
+ # whitespace. If we are outdenting something like '} } );'
+ # then we want to use one level below the last token
+ # ($i_terminal) in order to get it to fully outdent through
+ # all levels.
+ ##########################################################
+ my $indentation;
+ my $lev;
+ my $level_end = $levels_to_go[$iend];
- my %saw_chain_type;
- my %left_chain_type;
- my %right_chain_type;
- my %interior_chain_type;
- my $nmax = @{$ri_right} - 1;
+ if ( $adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_to_go[$ibeg];
+ $lev = $levels_to_go[$ibeg];
+ }
+ elsif ( $adjust_indentation == 1 ) {
- # scan the left and right end tokens of all lines
- my $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- $typel = '+' if ( $typel eq '-' ); # treat + and - the same
- $typer = '+' if ( $typer eq '-' );
- $typel = '*' if ( $typel eq '/' ); # treat * and / the same
- $typer = '*' if ( $typer eq '/' );
- my $tokenl = $tokens_to_go[$il];
- my $tokenr = $tokens_to_go[$ir];
+ # Change the indentation to be that of a different token on the line
+ # Previously, the indentation of the terminal token was used:
+ # OLD CODING:
+ # $indentation = $reduced_spaces_to_go[$i_terminal];
+ # $lev = $levels_to_go[$i_terminal];
- if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
- next if ( $typel eq '?' );
- push @{ $left_chain_type{$typel} }, $il;
- $saw_chain_type{$typel} = 1;
- $count++;
- }
- if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
- next if ( $typer eq '?' );
- push @{ $right_chain_type{$typer} }, $ir;
- $saw_chain_type{$typer} = 1;
- $count++;
+ # Generalization for MOJO:
+ # Use the lowest level indentation of the tokens on the line.
+ # For example, here we can use the indentation of the ending ';':
+ # } until ($selection > 0 and $selection < 10); # ok to use ';'
+ # But this will not outdent if we use the terminal indentation:
+ # )->then( sub { # use indentation of the ->, not the {
+ # Warning: reduced_spaces_to_go[] may be a reference, do not
+ # do numerical checks with it
+
+ my $i_ind = $ibeg;
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ while ( $i_ind < $i_terminal ) {
+ $i_ind++;
+ if ( $levels_to_go[$i_ind] < $lev ) {
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ }
+ }
}
- }
- return unless $count;
- # now look for any interior tokens of the same types
- $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir - 1 ) {
- my $type = $types_to_go[$i];
- $type = '+' if ( $type eq '-' );
- $type = '*' if ( $type eq '/' );
- if ( $saw_chain_type{$type} ) {
- push @{ $interior_chain_type{$type} }, $i;
- $count++;
+ # handle indented closing token which aligns with opening token
+ elsif ( $adjust_indentation == 2 ) {
+
+ # handle option to align closing token with opening token
+ $lev = $levels_to_go[$ibeg];
+
+ # calculate spaces needed to align with opening token
+ my $space_count =
+ get_spaces($opening_indentation) + $opening_offset;
+
+ # Indent less than the previous line.
+ #
+ # Problem: For -lp we don't exactly know what it was if there
+ # were recoverable spaces sent to the aligner. A good solution
+ # would be to force a flush of the vertical alignment buffer, so
+ # that we would know. For now, this rule is used for -lp:
+ #
+ # When the last line did not start with a closing token we will
+ # be optimistic that the aligner will recover everything wanted.
+ #
+ # This rule will prevent us from breaking a hierarchy of closing
+ # tokens, and in a worst case will leave a closing paren too far
+ # indented, but this is better than frequently leaving it not
+ # indented enough.
+ my $last_spaces = get_spaces($last_indentation_written);
+ if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
+ $last_spaces +=
+ get_recoverable_spaces($last_indentation_written);
+ }
+
+ # reset the indentation to the new space count if it works
+ # only options are all or none: nothing in-between looks good
+ $lev = $levels_to_go[$ibeg];
+ if ( $space_count < $last_spaces ) {
+ if ($rOpts_line_up_parentheses) {
+ my $lev = $levels_to_go[$ibeg];
+ $indentation =
+ new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ }
+ else {
+ $indentation = $space_count;
+ }
+ }
+
+ # revert to default if it doesn't work
+ else {
+ $space_count = leading_spaces_to_go($ibeg);
+ if ( $default_adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_to_go[$ibeg];
+ }
+ elsif ( $default_adjust_indentation == 1 ) {
+ $indentation = $reduced_spaces_to_go[$i_terminal];
+ $lev = $levels_to_go[$i_terminal];
+ }
}
}
- }
- return unless $count;
- # now make a list of all new break points
- my @insert_list;
+ # Full indentaion of closing tokens (-icb and -icp or -cti=2)
+ else {
+
+ # handle -icb (indented closing code block braces)
+ # Updated method for indented block braces: indent one full level if
+ # there is no continuation indentation. This will occur for major
+ # structures such as sub, if, else, but not for things like map
+ # blocks.
+ #
+ # Note: only code blocks without continuation indentation are
+ # handled here (if, else, unless, ..). In the following snippet,
+ # the terminal brace of the sort block will have continuation
+ # indentation as shown so it will not be handled by the coding
+ # here. We would have to undo the continuation indentation to do
+ # this, but it probably looks ok as is. This is a possible future
+ # update for semicolon terminated lines.
+ #
+ # if ($sortby eq 'date' or $sortby eq 'size') {
+ # @files = sort {
+ # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
+ # or $a cmp $b
+ # } @files;
+ # }
+ #
+ if ( $block_type_to_go[$ibeg]
+ && $ci_levels_to_go[$i_terminal] == 0 )
+ {
+ my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
+ $indentation = $spaces + $rOpts_indent_columns;
- # loop over all chain types
- foreach my $type ( keys %saw_chain_type ) {
+ # NOTE: for -lp we could create a new indentation object, but
+ # there is probably no need to do it
+ }
- # quit if just ONE continuation line with leading . For example--
- # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
- # . $contents;
- last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
+ # handle -icp and any -icb block braces which fall through above
+ # test such as the 'sort' block mentioned above.
+ else {
- # loop over all interior chain tokens
- foreach my $itest ( @{ $interior_chain_type{$type} } ) {
+ # There are currently two ways to handle -icp...
+ # One way is to use the indentation of the previous line:
+ # $indentation = $last_indentation_written;
- # loop over all left end tokens of same type
- if ( $left_chain_type{$type} ) {
- next if $nobreak_to_go[ $itest - 1 ];
- foreach my $i ( @{ $left_chain_type{$type} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest - 1;
+ # The other way is to use the indentation that the previous line
+ # would have had if it hadn't been adjusted:
+ $indentation = $last_unadjusted_indentation;
- # Break at matching ? if this : is at a different level.
- # For example, the ? before $THRf_DEAD in the following
- # should get a break if its : gets a break.
- #
- # my $flags =
- # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
- # : ( $_ & 4 ) ? $THRf_R_DETACHED
- # : $THRf_R_JOINABLE;
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question > 0 ) {
- push @insert_list, $i_question - 1;
- }
- }
- last;
+ # Current method: use the minimum of the two. This avoids
+ # inconsistent indentation.
+ if ( get_spaces($last_indentation_written) <
+ get_spaces($indentation) )
+ {
+ $indentation = $last_indentation_written;
}
}
- # loop over all right end tokens of same type
- if ( $right_chain_type{$type} ) {
- next if $nobreak_to_go[$itest];
- foreach my $i ( @{ $right_chain_type{$type} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest;
+ # use previous indentation but use own level
+ # to cause list to be flushed properly
+ $lev = $levels_to_go[$ibeg];
+ }
- # break at matching ? if this : is at a different level
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
- }
- last;
- }
+ # remember indentation except for multi-line quotes, which get
+ # no indentation
+ unless ( $ibeg == 0 && $starting_in_quote ) {
+ $last_indentation_written = $indentation;
+ $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
+ $last_leading_token = $tokens_to_go[$ibeg];
+ }
+
+ # be sure lines with leading closing tokens are not outdented more
+ # than the line which contained the corresponding opening token.
+
+ #############################################################
+ # updated per bug report in alex_bug.pl: we must not
+ # mess with the indentation of closing logical braces so
+ # we must treat something like '} else {' as if it were
+ # an isolated brace
+ #############################################################
+ my $is_isolated_block_brace = $block_type_to_go[$ibeg]
+ && ( $i_terminal == $ibeg
+ || $is_if_elsif_else_unless_while_until_for_foreach{
+ $block_type_to_go[$ibeg] } );
+
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+
+ if (
+ defined($opening_indentation)
+ && !$leading_paren_arrow # MOJO
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon
+ )
+ {
+ if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
+ $indentation = $opening_indentation;
}
}
- }
- # insert any new break points
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-}
+ # remember the indentation of each line of this batch
+ push @{$rindentation_list}, $indentation;
-sub break_equals {
+ # outdent lines with certain leading tokens...
+ if (
- # Look for assignment operators that could use a breakpoint.
- # For example, in the following snippet
- #
- # $HOME = $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # we could break at the = to get this, which is a little nicer:
- # $HOME =
- # $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # The logic here follows the logic in set_logical_padding, which
- # will add the padding in the second line to improve alignment.
- #
- my ( $self, $ri_left, $ri_right ) = @_;
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 2 );
+ # must be first word of this batch
+ $ibeg == 0
- # scan the left ends of first two lines
- my $tokbeg = "";
- my $depth_beg;
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $typel = $types_to_go[$il];
- my $tokenl = $tokens_to_go[$il];
+ # and ...
+ && (
- my $has_leading_op = ( $tokenl =~ /^\w/ )
- ? $is_chain_operator{$tokenl} # + - * / : ? && ||
- : $is_chain_operator{$typel}; # and, or
- return unless ($has_leading_op);
- if ( $n > 1 ) {
- return
- unless ( $tokenl eq $tokbeg
- && $nesting_depth_to_go[$il] eq $depth_beg );
- }
- $tokbeg = $tokenl;
- $depth_beg = $nesting_depth_to_go[$il];
- }
+ # certain leading keywords if requested
+ (
+ $rOpts->{'outdent-keywords'}
+ && $types_to_go[$ibeg] eq 'k'
+ && $outdent_keyword{ $tokens_to_go[$ibeg] }
+ )
- # now look for any interior tokens of the same types
- my $il = $ri_left->[0];
- my $ir = $ri_right->[0];
+ # or labels if requested
+ || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
- # now make a list of all new break points
- my @insert_list;
- for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
- my $type = $types_to_go[$i];
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- if ( $want_break_before{$type} ) {
- push @insert_list, $i - 1;
- }
- else {
- push @insert_list, $i;
- }
- }
- }
+ # or static block comments if requested
+ || ( $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-static-block-comments'}
+ && $is_static_block_comment )
+ )
+ )
- # Break after a 'return' followed by a chain of operators
- # return ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- # To give:
- # return
- # ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- my $i = 0;
- if ( $types_to_go[$i] eq 'k'
- && $tokens_to_go[$i] eq 'return'
- && $ir > $il
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- push @insert_list, $i;
- }
+ {
+ my $space_count = leading_spaces_to_go($ibeg);
+ if ( $space_count > 0 ) {
+ $space_count -= $rOpts_continuation_indentation;
+ $is_outdented_line = 1;
+ if ( $space_count < 0 ) { $space_count = 0 }
- return unless (@insert_list);
+ # do not promote a spaced static block comment to non-spaced;
+ # this is not normally necessary but could be for some
+ # unusual user inputs (such as -ci = -i)
+ if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
+ $space_count = 1;
+ }
- # One final check...
- # scan second and third lines and be sure there are no assignments
- # we want to avoid breaking at an = to make something like this:
- # unless ( $icon =
- # $html_icons{"$type-$state"}
- # or $icon = $html_icons{$type}
- # or $icon = $html_icons{$state} )
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir ) {
- my $type = $types_to_go[$i];
- return
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg );
+ if ($rOpts_line_up_parentheses) {
+ $indentation =
+ new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ }
+ else {
+ $indentation = $space_count;
+ }
+ }
}
- }
- # ok, insert any new break point
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ return ( $indentation, $lev, $level_end, $terminal_type,
+ $terminal_block_type, $is_semicolon_terminated,
+ $is_outdented_line );
}
- return;
-}
+} ## end closure set_adjusted_indentation
-sub is_list {
+sub get_opening_indentation {
- # Return true if the immediate contents of a container appears to be a
- # list.
+ # get the indentation of the line which output the opening token
+ # corresponding to a given closing token in the current output batch.
+ #
+ # given:
+ # $i_closing - index in this line of a closing token ')' '}' or ']'
+ #
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line.
+ #
+ # return:
+ # -the indentation of the line which contained the opening token
+ # which matches the token at index $i_opening
+ # -and its offset (number of columns) from the start of the line
+ #
+ my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
- my ( $self, $seqno ) = @_;
- return unless defined($seqno);
+ # first, see if the opening token is in the current batch
+ my $i_opening = $mate_index_to_go[$i_closing];
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
+ if ( $i_opening >= 0 ) {
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
+ # it is..look up the indentation
+ ( $indent, $offset, $is_leading ) =
+ lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
+ $rindentation_list );
+ }
- my $rLL = $self->[_rLL_];
- my $block_type = $rLL->[$K_opening]->[_BLOCK_TYPE_];
- return if ($block_type);
+ # if not, it should have been stored in the hash by a previous batch
+ else {
+ ( $indent, $offset, $is_leading, $exists ) =
+ get_saved_opening_indentation( $type_sequence_to_go[$i_closing] );
+ }
+ return ( $indent, $offset, $is_leading, $exists );
+}
- my $token = $rLL->[$K_opening]->[_TOKEN_];
- return if ( $token eq ':' );
+sub set_vertical_tightness_flags {
- # We will require at least 2 commas or 1 fat comma in the
- # immediate lower level.
- my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
- my $fat_comma_count = $rtype_count_by_seqno->{$seqno}->{'=>'};
- my $comma_count = $rtype_count_by_seqno->{$seqno}->{','};
- my $is_list = ( $fat_comma_count || $comma_count && $comma_count > 1 );
- return $is_list;
-}
+ my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
+ $ending_in_quote, $closing_side_comment )
+ = @_;
-sub insert_breaks_before_list_opening_containers {
+ # Define vertical tightness controls for the nth line of a batch.
+ # We create an array of parameters which tell the vertical aligner
+ # if we should combine this line with the next line to achieve the
+ # desired vertical tightness. The array of parameters contains:
+ #
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ #
+ # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
+ # if closing: spaces of padding to use
+ # [2] sequence number of container
+ # [3] valid flag: do not append if this flag is false. Will be
+ # true if appropriate -vt flag is set. Otherwise, Will be
+ # made true only for 2 line container in parens with -lp
+ #
+ # These flags are used by sub set_leading_whitespace in
+ # the vertical aligner
- my ( $self, $ri_left, $ri_right ) = @_;
+ my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
- # This routine is called once per batch to implement the parameters
- # --break-before-hash-brace, etc.
+ my $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
+ my $rOpts_block_brace_vertical_tightness =
+ $rOpts->{'block-brace-vertical-tightness'};
+ my $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
- # Nothing to do if none of these parameters has been set
- return unless %break_before_container_types;
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1:
+ # Handle Lines 1 .. n-1 but not the last line
+ # For non-BLOCK tokens, we will need to examine the next line
+ # too, so we won't consider the last line.
+ #--------------------------------------------------------------
+ if ( $n < $n_last_line ) {
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 0 );
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1a:
+ # Look for Type 1, last token of this line is a non-block opening token
+ #--------------------------------------------------------------
+ my $ibeg_next = $ri_first->[ $n + 1 ];
+ my $token_end = $tokens_to_go[$iend];
+ my $iend_next = $ri_last->[ $n + 1 ];
+ if (
+ $type_sequence_to_go[$iend]
+ && !$block_type_to_go[$iend]
+ && $is_opening_token{$token_end}
+ && (
+ $opening_vertical_tightness{$token_end} > 0
- my $rLL = $self->[_rLL_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- my $rhas_broken_container = $self->[_rhas_broken_container_];
- my $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ # allow 2-line method call to be closed up
+ || ( $rOpts_line_up_parentheses
+ && $token_end eq '('
+ && $iend > $ibeg
+ && $types_to_go[ $iend - 1 ] ne 'b' )
+ )
+ )
+ {
- # scan the ends of all lines
- my @insert_list;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- next unless ( $ir > $il );
- my $Kl = $K_to_go[$il];
- my $Kr = $K_to_go[$ir];
- my $Kend = $Kr;
- my $iend = $ir;
- my $type_end = $rLL->[$Kr]->[_TYPE_];
+ # avoid multiple jumps in nesting depth in one line if
+ # requested
+ my $ovt = $opening_vertical_tightness{$token_end};
+ my $iend_next = $ri_last->[ $n + 1 ];
+ unless (
+ $ovt < 2
+ && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
+ $nesting_depth_to_go[$ibeg_next] )
+ )
+ {
- # Backup before any side comment
- if ( $type_end eq '#' ) {
- $Kend = $self->K_previous_nonblank($Kr);
- next unless defined($Kend);
- $type_end = $rLL->[$Kend]->[_TYPE_];
- $iend = $ir + ( $Kend - $Kr );
+ # If -vt flag has not been set, mark this as invalid
+ # and aligner will validate it if it sees the closing paren
+ # within 2 lines.
+ my $valid_flag = $ovt;
+ @{$rvertical_tightness_flags} =
+ ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
+ }
}
- next unless ( $Kl < $Kend - 1 );
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1b:
+ # Look for Type 2, first token of next line is a non-block closing
+ # token .. and be sure this line does not have a side comment
+ #--------------------------------------------------------------
+ my $token_next = $tokens_to_go[$ibeg_next];
+ if ( $type_sequence_to_go[$ibeg_next]
+ && !$block_type_to_go[$ibeg_next]
+ && $is_closing_token{$token_next}
+ && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
+ {
+ my $ovt = $opening_vertical_tightness{$token_next};
+ my $cvt = $closing_vertical_tightness{$token_next};
+ if (
- my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
- next unless ( defined($seqno) );
+ # never append a trailing line like )->pack(
+ # because it will throw off later alignment
+ (
+ $nesting_depth_to_go[$ibeg_next] ==
+ $nesting_depth_to_go[ $iend_next + 1 ] + 1
+ )
+ && (
+ $cvt == 2
+ || (
+ $container_environment_to_go[$ibeg_next] ne 'LIST'
+ && (
+ $cvt == 1
- # Only for types of container tokens with a non-default break option
- my $token_end = $rLL->[$Kend]->[_TOKEN_];
- my $break_option = $break_before_container_types{$token_end};
- next unless ($break_option);
+ # allow closing up 2-line method calls
+ || ( $rOpts_line_up_parentheses
+ && $token_next eq ')' )
+ )
+ )
+ )
+ )
+ {
- # Require previous nonblank to be certain types (= and =>)
- # Note similar coding in sub adjust_container_indentation
- my $Kprev = $Kend - 1;
- my $prev_type = $rLL->[$Kprev]->[_TYPE_];
- if ( $prev_type eq 'b' ) {
- $Kprev--;
- next if ( $Kprev <= $Kl );
- $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ # decide which trailing closing tokens to append..
+ my $ok = 0;
+ if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
+ else {
+ my $str = join( '',
+ @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
+
+ # append closing token if followed by comment or ';'
+ if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
+ }
+
+ if ($ok) {
+ my $valid_flag = $cvt;
+ @{$rvertical_tightness_flags} = (
+ 2,
+ $tightness{$token_next} == 2 ? 0 : 1,
+ $type_sequence_to_go[$ibeg_next], $valid_flag,
+ );
+ }
+ }
}
- next unless ( $is_equal_or_fat_comma{$prev_type} );
- # This must be a list (this will exclude all code blocks)
- next unless $self->is_list($seqno);
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1c:
+ # Implement the Opening Token Right flag (Type 2)..
+ # If requested, move an isolated trailing opening token to the end of
+ # the previous line which ended in a comma. We could do this
+ # in sub recombine_breakpoints but that would cause problems
+ # with -lp formatting. The problem is that indentation will
+ # quickly move far to the right in nested expressions. By
+ # doing it after indentation has been set, we avoid changes
+ # to the indentation. Actual movement of the token takes place
+ # in sub valign_output_step_B.
+ #--------------------------------------------------------------
+ if (
+ $opening_token_right{ $tokens_to_go[$ibeg_next] }
+
+ # previous line is not opening
+ # (use -sot to combine with it)
+ && !$is_opening_token{$token_end}
- # Never break a weld
- next if ( $self->weld_len_left( $seqno, $token_end ) );
+ # previous line ended in one of these
+ # (add other cases if necessary; '=>' and '.' are not necessary
+ && !$block_type_to_go[$ibeg_next]
- # Final decision is based on selected option:
+ # this is a line with just an opening token
+ && ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
- # Option 1 = stable, try to follow input
- my $ok_to_break;
- if ( $break_option == 1 ) {
- if ( $ir - 2 > $il ) {
- $ok_to_break = $old_breakpoint_to_go[ $ir - 2 ];
- }
+ # looks bad if we align vertically with the wrong container
+ && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
+ )
+ {
+ my $valid_flag = 1;
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+ @{$rvertical_tightness_flags} =
+ ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
}
- # Option 2 = only if complex list, meaning:
- # - this list contains a broken container, or
- # - this list is contained in a broken list
- elsif ( $break_option == 2 ) {
- $ok_to_break = $rhas_broken_container->{$seqno};
- if ( !$ok_to_break ) {
- my $parent = $rparent_of_seqno->{$seqno};
- $ok_to_break = $self->is_list($parent);
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1d:
+ # Stacking of opening and closing tokens (Type 2)
+ #--------------------------------------------------------------
+ my $stackable;
+ my $token_beg_next = $tokens_to_go[$ibeg_next];
+
+ # patch to make something like 'qw(' behave like an opening paren
+ # (aran.t)
+ if ( $types_to_go[$ibeg_next] eq 'q' ) {
+ if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
+ $token_beg_next = $1;
}
}
- # Option 3 = always break
- elsif ( $break_option == 3 ) {
- $ok_to_break = 1;
+ if ( $is_closing_token{$token_end}
+ && $is_closing_token{$token_beg_next} )
+ {
+ $stackable = $stack_closing_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
}
-
- # Shouldn't happen! Bad flag, but make behavior same as 3
- else {
- $ok_to_break = 1;
+ elsif ($is_opening_token{$token_end}
+ && $is_opening_token{$token_beg_next} )
+ {
+ $stackable = $stack_opening_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
}
- next unless ($ok_to_break);
+ if ($stackable) {
- # This meets the criteria, so install a break before the opening token.
- my $Kbreak = $self->K_previous_nonblank($Kend);
- my $ibreak = $Kbreak - $Kl + $il;
- next if ( $ibreak < $il );
- next if ( $nobreak_to_go[$ibreak] );
- push @insert_list, $ibreak;
+ my $is_semicolon_terminated;
+ if ( $n + 1 == $n_last_line ) {
+ my ( $terminal_type, $i_terminal ) =
+ $self->terminal_type_i( $ibeg_next, $iend_next );
+ $is_semicolon_terminated = $terminal_type eq ';'
+ && $nesting_depth_to_go[$iend_next] <
+ $nesting_depth_to_go[$ibeg_next];
+ }
+ # this must be a line with just an opening token
+ # or end in a semicolon
+ if (
+ $is_semicolon_terminated
+ || ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
+ )
+ {
+ my $valid_flag = 1;
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+ @{$rvertical_tightness_flags} =
+ ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
+ );
+ }
+ }
}
- # insert any new break points
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 2:
+ # Handle type 3, opening block braces on last line of the batch
+ # Check for a last line with isolated opening BLOCK curly
+ #--------------------------------------------------------------
+ elsif ($rOpts_block_brace_vertical_tightness
+ && $ibeg eq $iend
+ && $types_to_go[$iend] eq '{'
+ && $block_type_to_go[$iend] =~
+ /$block_brace_vertical_tightness_pattern/ )
+ {
+ @{$rvertical_tightness_flags} =
+ ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
}
- return;
+
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 3:
+ # Handle type 4, a closing block brace on the last line of the batch Check
+ # for a last line with isolated closing BLOCK curly
+ # Patch: added a check for any new closing side comment which the
+ # -csc option may generate. If it exists, there will be a side comment
+ # so we cannot combine with a brace on the next line. This issue
+ # occurs for the combination -scbb and -csc is used.
+ #--------------------------------------------------------------
+ elsif ($rOpts_stack_closing_block_brace
+ && $ibeg eq $iend
+ && $block_type_to_go[$iend]
+ && $types_to_go[$iend] eq '}'
+ && ( !$closing_side_comment || $n < $n_last_line ) )
+ {
+ my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
+ @{$rvertical_tightness_flags} =
+ ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
+ }
+
+ # pack in the sequence numbers of the ends of this line
+ $rvertical_tightness_flags->[4] =
+ $self->get_seqno( $ibeg, $ending_in_quote );
+ $rvertical_tightness_flags->[5] =
+ $self->get_seqno( $iend, $ending_in_quote );
+ return $rvertical_tightness_flags;
}
-sub insert_final_ternary_breaks {
+##########################################################
+# CODE SECTION 14: Code for creating closing side comments
+##########################################################
- my ( $self, $ri_left, $ri_right ) = @_;
+{ ## begin closure accumulate_csc_text
- # Called once per batch to look for and do any final line breaks for
- # long ternary chains
+# These routines are called once per batch when the --closing-side-comments flag
+# has been set.
- my $nmax = @{$ri_right} - 1;
+ my %block_leading_text;
+ my %block_opening_line_number;
+ my $csc_new_statement_ok;
+ my $csc_last_label;
+ my %csc_block_label;
+ my $accumulating_text_for_block;
+ my $leading_block_text;
+ my $rleading_block_if_elsif_text;
+ my $leading_block_text_level;
+ my $leading_block_text_length_exceeded;
+ my $leading_block_text_line_length;
+ my $leading_block_text_line_number;
- # scan the left and right end tokens of all lines
- my $count = 0;
- my $i_first_colon = -1;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- return if ( $typel eq '?' );
- return if ( $typer eq '?' );
- if ( $typel eq ':' ) { $i_first_colon = $il; last; }
- elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ sub initialize_csc_vars {
+ %block_leading_text = ();
+ %block_opening_line_number = ();
+ $csc_new_statement_ok = 1;
+ $csc_last_label = "";
+ %csc_block_label = ();
+ $rleading_block_if_elsif_text = [];
+ $accumulating_text_for_block = "";
+ reset_block_text_accumulator();
+ return;
}
- # For long ternary chains,
- # if the first : we see has its ? is in the interior
- # of a preceding line, then see if there are any good
- # breakpoints before the ?.
- if ( $i_first_colon > 0 ) {
- my $i_question = $mate_index_to_go[$i_first_colon];
- if ( $i_question > 0 ) {
- my @insert_list;
- for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
- my $token = $tokens_to_go[$ii];
- my $type = $types_to_go[$ii];
+ sub reset_block_text_accumulator {
- # For now, a good break is either a comma or,
- # in a long chain, a 'return'.
- # Patch for RT #126633: added the $nmax>1 check to avoid
- # breaking after a return for a simple ternary. For longer
- # chains the break after return allows vertical alignment, so
- # it is still done. So perltidy -wba='?' will not break
- # immediately after the return in the following statement:
- # sub x {
- # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
- # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
- # }
- if (
- (
- $type eq ','
- || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
- )
- && $self->in_same_container_i( $ii, $i_question )
- )
- {
- push @insert_list, $ii;
- last;
- }
- }
+ # save text after 'if' and 'elsif' to append after 'else'
+ if ($accumulating_text_for_block) {
- # insert any new break points
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left,
- $ri_right );
+ if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
+ push @{$rleading_block_if_elsif_text}, $leading_block_text;
}
}
+ $accumulating_text_for_block = "";
+ $leading_block_text = "";
+ $leading_block_text_level = 0;
+ $leading_block_text_length_exceeded = 0;
+ $leading_block_text_line_number = 0;
+ $leading_block_text_line_length = 0;
+ return;
}
- return;
-}
-sub in_same_container_i {
+ sub set_block_text_accumulator {
+ my ( $self, $i ) = @_;
+ $accumulating_text_for_block = $tokens_to_go[$i];
+ if ( $accumulating_text_for_block !~ /^els/ ) {
+ $rleading_block_if_elsif_text = [];
+ }
+ $leading_block_text = "";
+ $leading_block_text_level = $levels_to_go[$i];
+ $leading_block_text_line_number = $self->get_output_line_number();
+ $leading_block_text_length_exceeded = 0;
- # check to see if tokens at i1 and i2 are in the
- # same container, and not separated by a comma, ? or :
- # This is an interface between the _to_go arrays to the rLL array
- my ( $self, $i1, $i2 ) = @_;
- return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
-}
+ # this will contain the column number of the last character
+ # of the closing side comment
+ $leading_block_text_line_length =
+ length($csc_last_label) +
+ length($accumulating_text_for_block) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $leading_block_text_level * $rOpts_indent_columns + 3;
+ return;
+ }
-{ ## begin closure in_same_container_K
- my $ris_break_token;
- my $ris_comma_token;
+ sub accumulate_block_text {
+ my ( $self, $i ) = @_;
- BEGIN {
+ # accumulate leading text for -csc, ignoring any side comments
+ if ( $accumulating_text_for_block
+ && !$leading_block_text_length_exceeded
+ && $types_to_go[$i] ne '#' )
+ {
- # all cases break on seeing commas at same level
- my @q = qw( => );
- push @q, ',';
- @{$ris_comma_token}{@q} = (1) x scalar(@q);
+ my $added_length = $token_lengths_to_go[$i];
+ $added_length += 1 if $i == 0;
+ my $new_line_length =
+ $leading_block_text_line_length + $added_length;
- # Non-ternary text also breaks on seeing any of qw(? : || or )
- # Example: we would not want to break at any of these .'s
- # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
- push @q, qw( or || ? : );
- @{$ris_break_token}{@q} = (1) x scalar(@q);
- }
+ # we can add this text if we don't exceed some limits..
+ if (
- sub in_same_container_K {
+ # we must not have already exceeded the text length limit
+ length($leading_block_text) <
+ $rOpts_closing_side_comment_maximum_text
- # Check to see if tokens at K1 and K2 are in the same container,
- # and not separated by certain characters: => , ? : || or
- # This version uses the newer $rLL data structure.
+ # and either:
+ # the new total line length must be below the line length limit
+ # or the new length must be below the text length limit
+ # (ie, we may allow one token to exceed the text length limit)
+ && (
+ $new_line_length <
+ maximum_line_length_for_level($leading_block_text_level)
- my ( $self, $K1, $K2 ) = @_;
- if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
- my $rLL = $self->[_rLL_];
- my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
- return if ( $depth_1 < 0 );
- return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
+ || length($leading_block_text) + $added_length <
+ $rOpts_closing_side_comment_maximum_text
+ )
- # Select character set to scan for
- my $type_1 = $rLL->[$K1]->[_TYPE_];
- my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
+ # UNLESS: we are adding a closing paren before the brace we seek.
+ # This is an attempt to avoid situations where the ... to be
+ # added are longer than the omitted right paren, as in:
- # Fast preliminary loop to verify that tokens are in the same container
- my $KK = $K1;
- while (1) {
- $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
- last if !defined($KK);
- last if ( $KK >= $K2 );
- my $depth_K = $rLL->[$KK]->[_SLEVEL_];
- return if ( $depth_K < $depth_1 );
- next if ( $depth_K > $depth_1 );
- if ( $type_1 ne ':' ) {
- my $tok_K = $rLL->[$KK]->[_TOKEN_];
- return if ( $tok_K eq '?' || $tok_K eq ':' );
- }
- }
+ # foreach my $item (@a_rather_long_variable_name_here) {
+ # &whatever;
+ # } ## end foreach my $item (@a_rather_long_variable_name_here...
- # Slow loop checking for certain characters
+ || (
+ $tokens_to_go[$i] eq ')'
+ && (
+ (
+ $i + 1 <= $max_index_to_go
+ && $block_type_to_go[ $i + 1 ] eq
+ $accumulating_text_for_block
+ )
+ || ( $i + 2 <= $max_index_to_go
+ && $block_type_to_go[ $i + 2 ] eq
+ $accumulating_text_for_block )
+ )
+ )
+ )
+ {
- ###########################################################
- # This is potentially a slow routine and not critical.
- # For safety just give up for large differences.
- # See test file 'infinite_loop.txt'
- ###########################################################
- return if ( $K2 - $K1 > 200 );
+ # add an extra space at each newline
+ if ( $i == 0 ) { $leading_block_text .= ' ' }
- foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
+ # add the token text
+ $leading_block_text .= $tokens_to_go[$i];
+ $leading_block_text_line_length = $new_line_length;
+ }
- my $depth_K = $rLL->[$K]->[_SLEVEL_];
- next if ( $depth_K > $depth_1 );
- return if ( $depth_K < $depth_1 ); # redundant, checked above
- my $tok = $rLL->[$K]->[_TOKEN_];
- return if ( $rbreak->{$tok} );
+ # show that text was truncated if necessary
+ elsif ( $types_to_go[$i] ne 'b' ) {
+ $leading_block_text_length_exceeded = 1;
+ $leading_block_text .= '...';
+ }
}
- return 1;
+ return;
}
-} ## end closure in_same_container_K
-sub set_continuation_breaks {
-
- # Called once per batch to set breaks in long lines.
+ sub accumulate_csc_text {
- # Define an array of indexes for inserting newline characters to
- # keep the line lengths below the maximum desired length. There is
- # an implied break after the last token, so it need not be included.
+ my ($self) = @_;
- # Method:
- # This routine is part of series of routines which adjust line
- # lengths. It is only called if a statement is longer than the
- # maximum line length, or if a preliminary scanning located
- # desirable break points. Sub scan_list has already looked at
- # these tokens and set breakpoints (in array
- # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
- # after commas, after opening parens, and before closing parens).
- # This routine will honor these breakpoints and also add additional
- # breakpoints as necessary to keep the line length below the maximum
- # requested. It bases its decision on where the 'bond strength' is
- # lowest.
+ # called once per output buffer when -csc is used. Accumulates
+ # the text placed after certain closing block braces.
+ # Defines and returns the following for this buffer:
- # Output: returns references to the arrays:
- # @i_first
- # @i_last
- # which contain the indexes $i of the first and last tokens on each
- # line.
+ my $block_leading_text = ""; # the leading text of the last '}'
+ my $rblock_leading_if_elsif_text;
+ my $i_block_leading_text =
+ -1; # index of token owning block_leading_text
+ my $block_line_count = 100; # how many lines the block spans
+ my $terminal_type = 'b'; # type of last nonblank token
+ my $i_terminal = 0; # index of last nonblank token
+ my $terminal_block_type = "";
- # In addition, the array:
- # $forced_breakpoint_to_go[$i]
- # may be updated to be =1 for any index $i after which there must be
- # a break. This signals later routines not to undo the breakpoint.
+ # update most recent statement label
+ $csc_last_label = "" unless ($csc_last_label);
+ if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
+ my $block_label = $csc_last_label;
- my ( $self, $saw_good_break ) = @_;
- my $DEBUG_BREAKPOINTS = 0;
+ # Loop over all tokens of this batch
+ for my $i ( 0 .. $max_index_to_go ) {
+ my $type = $types_to_go[$i];
+ my $block_type = $block_type_to_go[$i];
+ my $token = $tokens_to_go[$i];
- my @i_first = (); # the first index to output
- my @i_last = (); # the last index to output
- my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
- if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
+ # remember last nonblank token type
+ if ( $type ne '#' && $type ne 'b' ) {
+ $terminal_type = $type;
+ $terminal_block_type = $block_type;
+ $i_terminal = $i;
+ }
- my $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
+ my $type_sequence = $type_sequence_to_go[$i];
+ if ( $block_type && $type_sequence ) {
- $self->set_bond_strengths();
+ if ( $token eq '}' ) {
- my $imin = 0;
- my $imax = $max_index_to_go;
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin; # index for starting next iteration
+ # restore any leading text saved when we entered this block
+ if ( defined( $block_leading_text{$type_sequence} ) ) {
+ ( $block_leading_text, $rblock_leading_if_elsif_text )
+ = @{ $block_leading_text{$type_sequence} };
+ $i_block_leading_text = $i;
+ delete $block_leading_text{$type_sequence};
+ $rleading_block_if_elsif_text =
+ $rblock_leading_if_elsif_text;
+ }
- my $leading_spaces = leading_spaces_to_go($imin);
- my $line_count = 0;
- my $last_break_strength = NO_BREAK;
- my $i_last_break = -1;
- my $max_bias = 0.001;
- my $tiny_bias = 0.0001;
- my $leading_alignment_token = "";
- my $leading_alignment_type = "";
+ if ( defined( $csc_block_label{$type_sequence} ) ) {
+ $block_label = $csc_block_label{$type_sequence};
+ delete $csc_block_label{$type_sequence};
+ }
- # see if any ?/:'s are in order
- my $colons_in_order = 1;
- my $last_tok = "";
- my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
- my $colon_count = @colon_list;
- foreach (@colon_list) {
- if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
- $last_tok = $_;
- }
+ # if we run into a '}' then we probably started accumulating
+ # at something like a trailing 'if' clause..no harm done.
+ if ( $accumulating_text_for_block
+ && $levels_to_go[$i] <= $leading_block_text_level )
+ {
+ my $lev = $levels_to_go[$i];
+ reset_block_text_accumulator();
+ }
- # This is a sufficient but not necessary condition for colon chain
- my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
+ if ( defined( $block_opening_line_number{$type_sequence} ) )
+ {
+ my $output_line_number =
+ $self->get_output_line_number();
+ $block_line_count =
+ $output_line_number -
+ $block_opening_line_number{$type_sequence} + 1;
+ delete $block_opening_line_number{$type_sequence};
+ }
+ else {
- #-------------------------------------------------------
- # BEGINNING of main loop to set continuation breakpoints
- # Keep iterating until we reach the end
- #-------------------------------------------------------
- while ( $i_begin <= $imax ) {
- my $lowest_strength = NO_BREAK;
- my $starting_sum = $summed_lengths_to_go[$i_begin];
- my $i_lowest = -1;
- my $i_test = -1;
- my $lowest_next_token = '';
- my $lowest_next_type = 'b';
- my $i_lowest_next_nonblank = -1;
+ # Error: block opening line undefined for this line..
+ # This shouldn't be possible, but it is not a
+ # significant problem.
+ }
+ }
- #-------------------------------------------------------
- # BEGINNING of inner loop to find the best next breakpoint
- #-------------------------------------------------------
- my $strength = NO_BREAK;
- for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank = $inext_to_go[$i_test];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- my $maximum_line_length = maximum_line_length($i_begin);
+ elsif ( $token eq '{' ) {
- # adjustments to the previous bond strength may have been made, and
- # we must keep the bond strength of a token and its following blank
- # the same;
- my $last_strength = $strength;
- $strength = $bond_strength_to_go[$i_test];
- if ( $type eq 'b' ) { $strength = $last_strength }
+ my $line_number = $self->get_output_line_number();
+ $block_opening_line_number{$type_sequence} = $line_number;
- # use old breaks as a tie-breaker. For example to
- # prevent blinkers with -pbp in this code:
+ # set a label for this block, except for
+ # a bare block which already has the label
+ # A label can only be used on the next {
+ if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
+ $csc_block_label{$type_sequence} = $csc_last_label;
+ $csc_last_label = "";
-##@keywords{
-## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
-## = ();
+ if ( $accumulating_text_for_block
+ && $levels_to_go[$i] == $leading_block_text_level )
+ {
- # At the same time try to prevent a leading * in this code
- # with the default formatting:
- #
-## return
-## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
-## * ( $x**( $a - 1 ) )
-## * ( ( 1 - $x )**( $b - 1 ) );
+ if ( $accumulating_text_for_block eq $block_type ) {
- # reduce strength a bit to break ties at an old breakpoint ...
- if (
- $old_breakpoint_to_go[$i_test]
+ # save any leading text before we enter this block
+ $block_leading_text{$type_sequence} = [
+ $leading_block_text,
+ $rleading_block_if_elsif_text
+ ];
+ $block_opening_line_number{$type_sequence} =
+ $leading_block_text_line_number;
+ reset_block_text_accumulator();
+ }
+ else {
- # which is a 'good' breakpoint, meaning ...
- # we don't want to break before it
- && !$want_break_before{$type}
+ # shouldn't happen, but not a serious error.
+ # We were accumulating -csc text for block type
+ # $accumulating_text_for_block and unexpectedly
+ # encountered a '{' for block type $block_type.
+ }
+ }
+ }
+ }
- # and either we want to break before the next token
- # or the next token is not short (i.e. not a '*', '/' etc.)
- && $i_next_nonblank <= $imax
- && ( $want_break_before{$next_nonblank_type}
- || $token_lengths_to_go[$i_next_nonblank] > 2
- || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
- )
+ if ( $type eq 'k'
+ && $csc_new_statement_ok
+ && $is_if_elsif_else_unless_while_until_for_foreach{$token}
+ && $token =~ /$closing_side_comment_list_pattern/ )
{
- $strength -= $tiny_bias;
+ $self->set_block_text_accumulator($i);
}
-
- # otherwise increase strength a bit if this token would be at the
- # maximum line length. This is necessary to avoid blinking
- # in the above example when the -iob flag is added.
else {
- my $len =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum;
- if ( $len >= $maximum_line_length ) {
- $strength += $tiny_bias;
+
+ # note: ignoring type 'q' because of tricks being played
+ # with 'q' for hanging side comments
+ if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
+ $csc_new_statement_ok =
+ ( $block_type || $type eq 'J' || $type eq ';' );
+ }
+ if ( $type eq ';'
+ && $accumulating_text_for_block
+ && $levels_to_go[$i] == $leading_block_text_level )
+ {
+ reset_block_text_accumulator();
+ }
+ else {
+ $self->accumulate_block_text($i);
}
}
+ }
- my $must_break = 0;
+ # Treat an 'else' block specially by adding preceding 'if' and
+ # 'elsif' text. Otherwise, the 'end else' is not helpful,
+ # especially for cuddled-else formatting.
+ if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
+ $block_leading_text =
+ $self->make_else_csc_text( $i_terminal, $terminal_block_type,
+ $block_leading_text, $rblock_leading_if_elsif_text );
+ }
- # Force an immediate break at certain operators
- # with lower level than the start of the line,
- # unless we've already seen a better break.
- #
- ##############################################
- # Note on an issue with a preceding ?
- ##############################################
- # We don't include a ? in the above list, but there may
- # be a break at a previous ? if the line is long.
- # Because of this we do not want to force a break if
- # there is a previous ? on this line. For now the best way
- # to do this is to not break if we have seen a lower strength
- # point, which is probably a ?.
- #
- # Example of unwanted breaks we are avoiding at a '.' following a ?
- # from pod2html using perltidy -gnu:
- # )
- # ? "\n<A NAME=\""
- # . $value
- # . "\">\n$text</A>\n"
- # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
- if (
- (
- $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
- || ( $next_nonblank_type eq 'k'
- && $next_nonblank_token =~ /^(and|or)$/ )
- )
- && ( $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_next_nonblank] )
- && ( $strength <= $lowest_strength )
- )
- {
- $self->set_forced_breakpoint($i_next_nonblank);
- }
+ # if this line ends in a label then remember it for the next pass
+ $csc_last_label = "";
+ if ( $terminal_type eq 'J' ) {
+ $csc_last_label = $tokens_to_go[$i_terminal];
+ }
- if (
+ return ( $terminal_type, $i_terminal, $i_block_leading_text,
+ $block_leading_text, $block_line_count, $block_label );
+ }
- # Try to put a break where requested by scan_list
- $forced_breakpoint_to_go[$i_test]
+ sub make_else_csc_text {
- # break between ) { in a continued line so that the '{' can
- # be outdented
- # See similar logic in scan_list which catches instances
- # where a line is just something like ') {'. We have to
- # be careful because the corresponding block keyword might
- # not be on the first line, such as 'for' here:
- #
- # eval {
- # for ("a") {
- # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
- # }
- # };
- #
- || (
- $line_count
- && ( $token eq ')' )
- && ( $next_nonblank_type eq '{' )
- && ($next_nonblank_block_type)
- && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
+ # create additional -csc text for an 'else' and optionally 'elsif',
+ # depending on the value of switch
+ #
+ # = 0 add 'if' text to trailing else
+ # = 1 same as 0 plus:
+ # add 'if' to 'elsif's if can fit in line length
+ # add last 'elsif' to trailing else if can fit in one line
+ # = 2 same as 1 but do not check if exceed line length
+ #
+ # $rif_elsif_text = a reference to a list of all previous closing
+ # side comments created for this if block
+ #
+ my ( $self, $i_terminal, $block_type, $block_leading_text,
+ $rif_elsif_text )
+ = @_;
+ my $csc_text = $block_leading_text;
- # RT #104427: Dont break before opening sub brace because
- # sub block breaks handled at higher level, unless
- # it looks like the preceding list is long and broken
- && !(
- $next_nonblank_block_type =~ /$ANYSUB_PATTERN/
- && ( $nesting_depth_to_go[$i_begin] ==
- $nesting_depth_to_go[$i_next_nonblank] )
- )
+ my $rOpts_closing_side_comment_else_flag =
+ $rOpts->{'closing-side-comment-else-flag'};
- && !$rOpts->{'opening-brace-always-on-right'}
- )
+ if ( $block_type eq 'elsif'
+ && $rOpts_closing_side_comment_else_flag == 0 )
+ {
+ return $csc_text;
+ }
- # There is an implied forced break at a terminal opening brace
- || ( ( $type eq '{' ) && ( $i_test == $imax ) )
- )
- {
+ my $count = @{$rif_elsif_text};
+ return $csc_text unless ($count);
- # Forced breakpoints must sometimes be overridden, for example
- # because of a side comment causing a NO_BREAK. It is easier
- # to catch this here than when they are set.
- if ( $strength < NO_BREAK - 1 ) {
- $strength = $lowest_strength - $tiny_bias;
- $must_break = 1;
- }
- }
+ my $if_text = '[ if' . $rif_elsif_text->[0];
+
+ # always show the leading 'if' text on 'else'
+ if ( $block_type eq 'else' ) {
+ $csc_text .= $if_text;
+ }
+
+ # see if that's all
+ if ( $rOpts_closing_side_comment_else_flag == 0 ) {
+ return $csc_text;
+ }
+
+ my $last_elsif_text = "";
+ if ( $count > 1 ) {
+ $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
+ if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
+ }
+
+ # tentatively append one more item
+ my $saved_text = $csc_text;
+ if ( $block_type eq 'else' ) {
+ $csc_text .= $last_elsif_text;
+ }
+ else {
+ $csc_text .= ' ' . $if_text;
+ }
- # quit if a break here would put a good terminal token on
- # the next line and we already have a possible break
- if (
- !$must_break
- && ( $next_nonblank_type =~ /^[\;\,]$/ )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
- $starting_sum
- ) > $maximum_line_length
- )
- )
- {
- last if ( $i_lowest >= 0 );
- }
+ # all done if no length checks requested
+ if ( $rOpts_closing_side_comment_else_flag == 2 ) {
+ return $csc_text;
+ }
- # Avoid a break which would strand a single punctuation
- # token. For example, we do not want to strand a leading
- # '.' which is followed by a long quoted string.
- # But note that we do want to do this with -extrude (l=1)
- # so please test any changes to this code on -extrude.
- if (
- !$must_break
- && ( $i_test == $i_begin )
- && ( $i_test < $imax )
- && ( $token eq $type )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum
- ) < $maximum_line_length
- )
- )
- {
- $i_test = min( $imax, $inext_to_go[$i_test] );
- redo;
- }
+ # undo it if line length exceeded
+ my $length =
+ length($csc_text) +
+ length($block_type) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
+ if (
+ $length > maximum_line_length_for_level($leading_block_text_level) )
+ {
+ $csc_text = $saved_text;
+ }
+ return $csc_text;
+ }
+} ## end closure accumulate_csc_text
- if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
- {
+{ ## begin closure balance_csc_text
- # break at previous best break if it would have produced
- # a leading alignment of certain common tokens, and it
- # is different from the latest candidate break
- last
- if ($leading_alignment_type);
+ # Some additional routines for handling the --closing-side-comments option
- # Force at least one breakpoint if old code had good
- # break It is only called if a breakpoint is required or
- # desired. This will probably need some adjustments
- # over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formatted text, then
- # the same breakpoints will occur. scbreak.t
- last
- if (
- $i_test == $imax # we are at the end
- && !get_forced_breakpoint_count()
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest >= 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
- && $strength - $lowest_strength < 0.5 * WEAK # and it's good
- );
+ my %matching_char;
- # Do not skip past an important break point in a short final
- # segment. For example, without this check we would miss the
- # break at the final / in the following code:
- #
- # $depth_stop =
- # ( $tau * $mass_pellet * $q_0 *
- # ( 1. - exp( -$t_stop / $tau ) ) -
- # 4. * $pi * $factor * $k_ice *
- # ( $t_melt - $t_ice ) *
- # $r_pellet *
- # $t_stop ) /
- # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
- #
- if ( $line_count > 2
- && $i_lowest < $i_test
- && $i_test > $imax - 2
- && $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_lowest]
- && $lowest_strength < $last_break_strength - .5 * WEAK )
- {
- # Make this break for math operators for now
- my $ir = $inext_to_go[$i_lowest];
- my $il = $iprev_to_go[$ir];
- last
- if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
- || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
- }
+ BEGIN {
+ %matching_char = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ );
+ }
- # Update the minimum bond strength location
- $lowest_strength = $strength;
- $i_lowest = $i_test;
- $lowest_next_token = $next_nonblank_token;
- $lowest_next_type = $next_nonblank_type;
- $i_lowest_next_nonblank = $i_next_nonblank;
- last if $must_break;
+ sub balance_csc_text {
- # set flags to remember if a break here will produce a
- # leading alignment of certain common tokens
- if ( $line_count > 0
- && $i_test < $imax
- && ( $lowest_strength - $last_break_strength <= $max_bias )
- )
- {
- my $i_last_end = $iprev_to_go[$i_begin];
- my $tok_beg = $tokens_to_go[$i_begin];
- my $type_beg = $types_to_go[$i_begin];
- if (
+ # Append characters to balance a closing side comment so that editors
+ # such as vim can correctly jump through code.
+ # Simple Example:
+ # input = ## end foreach my $foo ( sort { $b ...
+ # output = ## end foreach my $foo ( sort { $b ...})
- # check for leading alignment of certain tokens
- (
- $tok_beg eq $next_nonblank_token
- && $is_chain_operator{$tok_beg}
- && ( $type_beg eq 'k'
- || $type_beg eq $tok_beg )
- && $nesting_depth_to_go[$i_begin] >=
- $nesting_depth_to_go[$i_next_nonblank]
- )
+ # NOTE: This routine does not currently filter out structures within
+ # quoted text because the bounce algorithms in text editors do not
+ # necessarily do this either (a version of vim was checked and
+ # did not do this).
- || ( $tokens_to_go[$i_last_end] eq $token
- && $is_chain_operator{$token}
- && ( $type eq 'k' || $type eq $token )
- && $nesting_depth_to_go[$i_last_end] >=
- $nesting_depth_to_go[$i_test] )
- )
- {
- $leading_alignment_token = $next_nonblank_token;
- $leading_alignment_type = $next_nonblank_type;
- }
- }
- }
+ # Some complex examples which will cause trouble for some editors:
+ # while ( $mask_string =~ /\{[^{]*?\}/g ) {
+ # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
+ # if ( $1 eq '{' ) {
+ # test file test1/braces.pl has many such examples.
- my $too_long = ( $i_test >= $imax );
- if ( !$too_long ) {
- my $next_length =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 2 ] -
- $starting_sum;
- $too_long = $next_length > $maximum_line_length;
+ my ($csc) = @_;
- # To prevent blinkers we will avoid leaving a token exactly at
- # the line length limit unless it is the last token or one of
- # several "good" types.
- #
- # The following code was a blinker with -pbp before this
- # modification:
-## $last_nonblank_token eq '('
-## && $is_indirect_object_taker{ $paren_type
-## [$paren_depth] }
- # The issue causing the problem is that if the
- # term [$paren_depth] gets broken across a line then
- # the whitespace routine doesn't see both opening and closing
- # brackets and will format like '[ $paren_depth ]'. This
- # leads to an oscillation in length depending if we break
- # before the closing bracket or not.
- if ( !$too_long
- && $i_test + 1 < $imax
- && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
- {
- $too_long = $next_length >= $maximum_line_length;
- }
- }
+ # loop to examine characters one-by-one, RIGHT to LEFT and
+ # build a balancing ending, LEFT to RIGHT.
+ for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
- $DEBUG_BREAKPOINTS
- && do {
- my $ltok = $token;
- my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
- my $i_testp2 = $i_test + 2;
- if ( $i_testp2 > $max_index_to_go + 1 ) {
- $i_testp2 = $max_index_to_go + 1;
- }
- if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
- if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
- print STDOUT
-"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
- };
+ my $char = substr( $csc, $pos, 1 );
- # allow one extra terminal token after exceeding line length
- # if it would strand this token.
- if ( $rOpts_fuzzy_line_length
- && $too_long
- && $i_lowest == $i_test
- && $token_lengths_to_go[$i_test] > 1
- && $next_nonblank_type =~ /^[\;\,]$/ )
- {
- $too_long = 0;
- }
+ # ignore everything except structural characters
+ next unless ( $matching_char{$char} );
- last
- if (
- ( $i_test == $imax ) # we're done if no more tokens,
- || (
- ( $i_lowest >= 0 ) # or no more space and we have a break
- && $too_long
- )
- );
+ # pop most recently appended character
+ my $top = chop($csc);
+
+ # push it back plus the mate to the newest character
+ # unless they balance each other.
+ $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
}
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint
- # Now decide exactly where to put the breakpoint
- #-------------------------------------------------------
+ # return the balanced string
+ return $csc;
+ }
+} ## end closure balance_csc_text
- # it's always ok to break at imax if no other break was found
- if ( $i_lowest < 0 ) { $i_lowest = $imax }
+sub add_closing_side_comment {
- # semi-final index calculation
- my $i_next_nonblank = $inext_to_go[$i_lowest];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my $self = shift;
+ my $rLL = $self->[_rLL_];
- #-------------------------------------------------------
- # ?/: rule 1 : if a break here will separate a '?' on this
- # line from its closing ':', then break at the '?' instead.
- #-------------------------------------------------------
- foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
- next unless ( $tokens_to_go[$i] eq '?' );
+ # add closing side comments after closing block braces if -csc used
+ my ( $closing_side_comment, $cscw_block_comment );
- # do not break if probable sequence of ?/: statements
- next if ($is_colon_chain);
+ #---------------------------------------------------------------
+ # Step 1: loop through all tokens of this line to accumulate
+ # the text needed to create the closing side comments. Also see
+ # how the line ends.
+ #---------------------------------------------------------------
- # do not break if statement is broken by side comment
- next
- if ( $tokens_to_go[$max_index_to_go] eq '#'
- && $self->terminal_type_i( 0, $max_index_to_go ) !~
- /^[\;\}]$/ );
+ my ( $terminal_type, $i_terminal, $i_block_leading_text,
+ $block_leading_text, $block_line_count, $block_label )
+ = $self->accumulate_csc_text();
- # no break needed if matching : is also on the line
- next
- if ( $mate_index_to_go[$i] >= 0
- && $mate_index_to_go[$i] <= $i_next_nonblank );
+ #---------------------------------------------------------------
+ # Step 2: make the closing side comment if this ends a block
+ #---------------------------------------------------------------
+ my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
- $i_lowest = $i;
- if ( $want_break_before{'?'} ) { $i_lowest-- }
- last;
- }
+ # if this line might end in a block closure..
+ if (
+ $terminal_type eq '}'
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint:
- # Break the line after the token with index i=$i_lowest
- #-------------------------------------------------------
+ # ..and either
+ && (
- # final index calculation
- $i_next_nonblank = $inext_to_go[$i_lowest];
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # the block is long enough
+ ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
- $DEBUG_BREAKPOINTS
- && print STDOUT
- "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
+ # or there is an existing comment to check
+ || ( $have_side_comment
+ && $rOpts->{'closing-side-comment-warnings'} )
+ )
- #-------------------------------------------------------
- # ?/: rule 2 : if we break at a '?', then break at its ':'
- #
- # Note: this rule is also in sub scan_list to handle a break
- # at the start and end of a line (in case breaks are dictated
- # by side comments).
- #-------------------------------------------------------
- if ( $next_nonblank_type eq '?' ) {
- $self->set_closing_breakpoint($i_next_nonblank);
- }
- elsif ( $types_to_go[$i_lowest] eq '?' ) {
- $self->set_closing_breakpoint($i_lowest);
- }
+ # .. and if this is one of the types of interest
+ && $block_type_to_go[$i_terminal] =~
+ /$closing_side_comment_list_pattern/
- #-------------------------------------------------------
- # ?/: rule 3 : if we break at a ':' then we save
- # its location for further work below. We may need to go
- # back and break at its '?'.
- #-------------------------------------------------------
- if ( $next_nonblank_type eq ':' ) {
- push @i_colon_breaks, $i_next_nonblank;
- }
- elsif ( $types_to_go[$i_lowest] eq ':' ) {
- push @i_colon_breaks, $i_lowest;
- }
+ # .. but not an anonymous sub
+ # These are not normally of interest, and their closing braces are
+ # often followed by commas or semicolons anyway. This also avoids
+ # possible erratic output due to line numbering inconsistencies
+ # in the cases where their closing braces terminate a line.
+ && $block_type_to_go[$i_terminal] ne 'sub'
- # here we should set breaks for all '?'/':' pairs which are
- # separated by this line
+ # ..and the corresponding opening brace must is not in this batch
+ # (because we do not need to tag one-line blocks, although this
+ # should also be caught with a positive -csci value)
+ && $self->mate_index_to_go($i_terminal) < 0
- $line_count++;
+ # ..and either
+ && (
- # save this line segment, after trimming blanks at the ends
- push( @i_first,
- ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
- push( @i_last,
- ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
+ # this is the last token (line doesn't have a side comment)
+ !$have_side_comment
- # set a forced breakpoint at a container opening, if necessary, to
- # signal a break at a closing container. Excepting '(' for now.
- if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
- && !$forced_breakpoint_to_go[$i_lowest] )
- {
- $self->set_closing_breakpoint($i_lowest);
- }
+ # or the old side comment is a closing side comment
+ || $tokens_to_go[$max_index_to_go] =~
+ /$closing_side_comment_prefix_pattern/
+ )
+ )
+ {
- # get ready to go again
- $i_begin = $i_lowest + 1;
- $last_break_strength = $lowest_strength;
- $i_last_break = $i_lowest;
- $leading_alignment_token = "";
- $leading_alignment_type = "";
- $lowest_next_token = '';
- $lowest_next_type = 'b';
+ # then make the closing side comment text
+ if ($block_label) { $block_label .= " " }
+ my $token =
+"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
- if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
- $i_begin++;
+ # append any extra descriptive text collected above
+ if ( $i_block_leading_text == $i_terminal ) {
+ $token .= $block_leading_text;
}
- # update indentation size
- if ( $i_begin <= $imax ) {
- $leading_spaces = leading_spaces_to_go($i_begin);
- }
- }
+ $token = balance_csc_text($token)
+ if $rOpts->{'closing-side-comments-balanced'};
- #-------------------------------------------------------
- # END of main loop to set continuation breakpoints
- # Now go back and make any necessary corrections
- #-------------------------------------------------------
+ $token =~ s/\s*$//; # trim any trailing whitespace
- #-------------------------------------------------------
- # ?/: rule 4 -- if we broke at a ':', then break at
- # corresponding '?' unless this is a chain of ?: expressions
- #-------------------------------------------------------
- if (@i_colon_breaks) {
+ # handle case of existing closing side comment
+ if ($have_side_comment) {
- # using a simple method for deciding if we are in a ?/: chain --
- # this is a chain if it has multiple ?/: pairs all in order;
- # otherwise not.
- # Note that if line starts in a ':' we count that above as a break
- my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+ # warn if requested and tokens differ significantly
+ if ( $rOpts->{'closing-side-comment-warnings'} ) {
+ my $old_csc = $tokens_to_go[$max_index_to_go];
+ my $new_csc = $token;
+ $new_csc =~ s/\s+//g; # trim all whitespace
+ $old_csc =~ s/\s+//g; # trim all whitespace
+ $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
+ my $new_trailing_dots = $1;
+ $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
- unless ($is_chain) {
- my @insert_list = ();
- foreach (@i_colon_breaks) {
- my $i_question = $mate_index_to_go[$_];
- if ( $i_question >= 0 ) {
- if ( $want_break_before{'?'} ) {
- $i_question = $iprev_to_go[$i_question];
- }
+ # Patch to handle multiple closing side comments at
+ # else and elsif's. These have become too complicated
+ # to check, so if we see an indication of
+ # '[ if' or '[ # elsif', then assume they were made
+ # by perltidy.
+ if ( $block_type_to_go[$i_terminal] eq 'else' ) {
+ if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
+ }
+ elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
+ if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
+ }
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
+ # if old comment is contained in new comment,
+ # only compare the common part.
+ if ( length($new_csc) > length($old_csc) ) {
+ $new_csc = substr( $new_csc, 0, length($old_csc) );
}
- $self->insert_additional_breaks( \@insert_list, \@i_first,
- \@i_last );
- }
- }
- }
- return ( \@i_first, \@i_last, $colon_count );
-}
-sub insert_additional_breaks {
+ # if the new comment is shorter and has been limited,
+ # only compare the common part.
+ if ( length($new_csc) < length($old_csc)
+ && $new_trailing_dots )
+ {
+ $old_csc = substr( $old_csc, 0, length($new_csc) );
+ }
- # this routine will add line breaks at requested locations after
- # sub set_continuation_breaks has made preliminary breaks.
+ # any remaining difference?
+ if ( $new_csc ne $old_csc ) {
- my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
- my $i_f;
- my $i_l;
- my $line_number = 0;
- foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
+ # just leave the old comment if we are below the threshold
+ # for creating side comments
+ if ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ $token = undef;
+ }
- next if ( $nobreak_to_go[$i_break_left] );
+ # otherwise we'll make a note of it
+ else {
+
+ warning(
+"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
+ );
+
+ # save the old side comment in a new trailing block
+ # comment
+ my $timestamp = "";
+ if ( $rOpts->{'timestamp'} ) {
+ my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
+ $year += 1900;
+ $month += 1;
+ $timestamp = "$year-$month-$day";
+ }
+ $cscw_block_comment =
+"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
+## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
+ }
+ }
+ else {
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- while ( $i_break_left >= $i_l ) {
- $line_number++;
+ # No differences.. we can safely delete old comment if we
+ # are below the threshold
+ if ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ $token = undef;
+ $self->unstore_token_to_go()
+ if ( $types_to_go[$max_index_to_go] eq '#' );
+ $self->unstore_token_to_go()
+ if ( $types_to_go[$max_index_to_go] eq 'b' );
+ }
+ }
+ }
- # shouldn't happen unless caller passes bad indexes
- if ( $line_number >= @{$ri_last} ) {
- warning(
-"Non-fatal program bug: couldn't set break at $i_break_left\n"
- );
- report_definite_bug();
- return;
+ # switch to the new csc (unless we deleted it!)
+ if ($token) {
+ $tokens_to_go[$max_index_to_go] = $token;
+ my $K = $K_to_go[$max_index_to_go];
+ $rLL->[$K]->[_TOKEN_] = $token;
+ $rLL->[$K]->[_TOKEN_LENGTH_] =
+ length($token); # NOTE: length no longer important
}
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
}
- # Do not leave a blank at the end of a line; back up if necessary
- if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+ # handle case of NO existing closing side comment
+ else {
- my $i_break_right = $inext_to_go[$i_break_left];
- if ( $i_break_left >= $i_f
- && $i_break_left < $i_l
- && $i_break_right > $i_f
- && $i_break_right <= $i_l )
- {
- splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
- splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+ # To avoid inserting a new token in the token arrays, we
+ # will just return the new side comment so that it can be
+ # inserted just before it is needed in the call to the
+ # vertical aligner.
+ $closing_side_comment = $token;
}
}
- return;
+ return ( $closing_side_comment, $cscw_block_comment );
}
-{ ## begin closure set_closing_breakpoint
-
- my %postponed_breakpoint;
+############################
+# CODE SECTION 15: Summarize
+############################
- sub initialize_postponed_breakpoint {
- %postponed_breakpoint = ();
- return;
- }
+sub wrapup {
- sub has_postponed_breakpoint {
- my ($seqno) = @_;
- return $postponed_breakpoint{$seqno};
- }
+ # This is the last routine called when a file is formatted.
+ # Flush buffer and write any informative messages
+ my $self = shift;
- sub set_closing_breakpoint {
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->decrement_output_line_number()
+ ; # fix up line number since it was incremented
+ we_are_at_the_last_line();
+ my $added_semicolon_count = $self->[_added_semicolon_count_];
+ my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
+ my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
- # set a breakpoint at a matching closing token
- # at present, this is only used to break at a ':' which matches a '?'
- my ( $self, $i_break ) = @_;
+ if ( $added_semicolon_count > 0 ) {
+ my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
+ my $what =
+ ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
+ write_logfile_entry("$added_semicolon_count $what added:\n");
+ write_logfile_entry(
+ " $first at input line $first_added_semicolon_at\n");
- if ( $mate_index_to_go[$i_break] >= 0 ) {
+ if ( $added_semicolon_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_added_semicolon_at\n");
+ }
+ write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
+ write_logfile_entry("\n");
+ }
- # CAUTION: infinite recursion possible here:
- # set_closing_breakpoint calls set_forced_breakpoint, and
- # set_forced_breakpoint call set_closing_breakpoint
- # ( test files attrib.t, BasicLyx.pm.html).
- # Don't reduce the '2' in the statement below
- if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
+ my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
+ my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
+ my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
+ if ( $deleted_semicolon_count > 0 ) {
+ my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
+ my $what =
+ ( $deleted_semicolon_count > 1 )
+ ? "semicolons were"
+ : "semicolon was";
+ write_logfile_entry(
+ "$deleted_semicolon_count unnecessary $what deleted:\n");
+ write_logfile_entry(
+ " $first at input line $first_deleted_semicolon_at\n");
- # break before } ] and ), but sub set_forced_breakpoint will decide
- # to break before or after a ? and :
- my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
- $self->set_forced_breakpoint(
- $mate_index_to_go[$i_break] - $inc );
- }
- }
- else {
- my $type_sequence = $type_sequence_to_go[$i_break];
- if ($type_sequence) {
- my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
- $postponed_breakpoint{$type_sequence} = 1;
- }
+ if ( $deleted_semicolon_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_deleted_semicolon_at\n");
}
- return;
+ write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
+ write_logfile_entry("\n");
}
-} ## end closure set_closing_breakpoint
-
-sub compare_indentation_levels {
- # Check to see if output line tabbing agrees with input line
- # this can be very useful for debugging a script which has an extra
- # or missing brace.
+ my $embedded_tab_count = $self->[_embedded_tab_count_];
+ my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
+ my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
+ if ( $embedded_tab_count > 0 ) {
+ my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
+ my $what =
+ ( $embedded_tab_count > 1 )
+ ? "quotes or patterns"
+ : "quote or pattern";
+ write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
+ write_logfile_entry(
+"This means the display of this script could vary with device or software\n"
+ );
+ write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
- my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
- return unless ( defined($K_first) );
+ if ( $embedded_tab_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_embedded_tab_at\n");
+ }
+ write_logfile_entry("\n");
+ }
- my $rLL = $self->[_rLL_];
+ my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
+ my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
+ my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
- my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
- my $radjusted_levels = $self->[_radjusted_levels_];
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
- $structural_indentation_level = $radjusted_levels->[$K_first];
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
+ );
}
- my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
- && $rLL->[$K_first]->[_BLOCK_TYPE_];
+ my $first_btd = $self->[_first_brace_tabbing_disagreement_];
+ if ($first_btd) {
+ my $msg =
+"First closing brace indentation disagreement started at input line $first_btd\n";
+ write_logfile_entry($msg);
- if ( $guessed_indentation_level ne $structural_indentation_level ) {
- $self->[_last_tabbing_disagreement_] = $line_number;
+ # leave a hint in the .ERR file if there was a brace error
+ if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
+ }
- if ($is_closing_block) {
+ my $in_btd = $self->[_in_brace_tabbing_disagreement_];
+ if ($in_btd) {
+ my $msg =
+"Ending with brace indentation disagreement which started at input line $in_btd\n";
+ write_logfile_entry($msg);
- if ( !$self->[_in_brace_tabbing_disagreement_] ) {
- $self->[_in_brace_tabbing_disagreement_] = $line_number;
- }
- if ( !$self->[_first_brace_tabbing_disagreement_] ) {
- $self->[_first_brace_tabbing_disagreement_] = $line_number;
- }
+ # leave a hint in the .ERR file if there was a brace error
+ if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
+ }
- }
+ if ($in_tabbing_disagreement) {
+ my $msg =
+"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
+ write_logfile_entry($msg);
+ }
+ else {
- if ( !$self->[_in_tabbing_disagreement_] ) {
- $self->[_tabbing_disagreement_count_]++;
+ if ($last_tabbing_disagreement) {
- if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
- );
- }
- $self->[_in_tabbing_disagreement_] = $line_number;
- $self->[_first_tabbing_disagreement_] = $line_number
- unless ( $self->[_first_tabbing_disagreement_] );
+ write_logfile_entry(
+"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
+ );
+ }
+ else {
+ write_logfile_entry("No indentation disagreement seen\n");
}
}
- else {
-
- $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
- my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
- if ($in_tabbing_disagreement) {
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
+ );
+ }
+ write_logfile_entry("\n");
- if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"End indentation disagreement from input line $in_tabbing_disagreement\n"
- );
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->report_anything_unusual();
- if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
- {
- write_logfile_entry(
- "No further tabbing disagreements will be noted\n");
- }
- }
- $self->[_in_tabbing_disagreement_] = 0;
+ $file_writer_object->report_line_length_errors();
- }
- }
return;
}