From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Mon, 17 Jul 2023 22:05:52 +0000 (-0700)
Subject: simplify code for line-endings
X-Git-Tag: 20230701.01~1
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=9d78036dd72b0cc27451cb507a4ef55caef00ebc;p=perltidy.git

simplify code for line-endings
---

diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm
index 76f54808..615ff84f 100644
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -273,41 +273,36 @@ EOM
 
 sub find_input_line_ending {
 
-    # Peek at a file and return first line ending character.
-    # Return undefined value in case of any trouble.
-    my ($input_file) = @_;
-    my $ending;
+    # Given:
+    #   $buf = raw first line of input file
+    # Return
+    #   first line ending character.
+    #   undefined value in case of any trouble.
+    my ($buf) = @_;
 
-    # silently ignore input from object or stdin
-    if ( ref($input_file) || $input_file eq '-' ) {
-        return $ending;
-    }
+    my $ending;
+    if ($buf) {
 
-    my $fh;
-    open( $fh, '<', $input_file ) || return $ending;
+        if ( $buf =~ /([\012\015]+)/ ) {
+            my $test = $1;
 
-    binmode $fh;
-    my $buf;
-    read( $fh, $buf, 1024 );
-    close $fh || return $ending;
-    if ( $buf && $buf =~ /([\012\015]+)/ ) {
-        my $test = $1;
+            # dos
+            if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 
-        # dos
-        if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
+            # mac
+            elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 
-        # mac
-        elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
+            # unix
+            elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 
-        # unix
-        elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
+            # unknown
+            else { }
+        }
 
-        # unknown
+        # no ending seen
         else { }
-    }
 
-    # no ending seen
-    else { }
+    }
 
     return $ending;
 } ## end sub find_input_line_ending
@@ -1202,15 +1197,6 @@ sub backup_method_copy {
         }
     }
 
-    # or in a SCALAR ref (less efficient, and only used for testing)
-    elsif ( ref($output_file) eq 'SCALAR' ) {
-        foreach my $line ( split /^/, ${$output_file} ) {
-            $fout->print($line)
-              or
-              Die("cannot print to '$input_file' with -b option: $OS_ERROR\n");
-        }
-    }
-
     # Error if anything else ...
     # This can only happen if the output was changed from \@tmp_buff
     else {
@@ -1498,11 +1484,13 @@ sub get_decoded_string_buffer {
 
     # Decode the input buffer if necessary or requested
 
-    # Given
+    # Given:
     #   $input_file   = the input file or stream
     #   $display_name = its name to use in error messages
 
-    # Return
+    # Set $self->[_line_separator_], and
+
+    # Return:
     #   $buf = string buffer with input, decoded from utf8 if necessary
     #   $is_encoded_data  = true if $buf is decoded from utf8
     #   $decoded_input_as = true if perltidy decoded input buf
@@ -1518,13 +1506,26 @@ sub get_decoded_string_buffer {
     # return nothing if error
     return unless ($fh);
 
-    my $buf   = EMPTY_STRING;
-    my $count = 0;
+    my $buf            = EMPTY_STRING;
+    my $line_separator = $self->[_line_separator_default_];
+    my $count          = 0;
+
     while ( my $line = $fh->getline() ) {
         $buf .= $line;
+
+        # Find and change the line separator if requested with -ple
+        if ( !$count && $rOpts->{'preserve-line-endings'} ) {
+
+            # Limit string length in case we have a strange file
+            my $line1_raw = substr( $line, 0, 1024 );
+            my $ls_input  = find_input_line_ending($line1_raw);
+            if ( defined($ls_input) ) { $line_separator = $ls_input }
+        }
         $count++;
     }
 
+    $self->[_line_separator_] = $line_separator;
+
     # patch to read raw mac files under unix, dos
     # look for a single line with embedded \r's
     if ( $count == 1 && $buf =~ /[\015][^\015\012]/ ) {
@@ -1534,6 +1535,9 @@ sub get_decoded_string_buffer {
         }
     }
 
+    if ( $rOpts->{'preserve-line-endings'} ) {
+    }
+
     my $encoding_in              = EMPTY_STRING;
     my $rOpts_character_encoding = $rOpts->{'character-encoding'};
     my $encoding_log_message;
@@ -1737,10 +1741,9 @@ sub process_all_files {
     #   process_iteration_layer - handle any iterations on formatting
     #   process_single_case     - solves one formatting problem
 
-    my $rOpts                  = $self->[_rOpts_];
-    my $dot                    = $self->[_file_extension_separator_];
-    my $diagnostics_object     = $self->[_diagnostics_object_];
-    my $line_separator_default = $self->[_line_separator_default_];
+    my $rOpts              = $self->[_rOpts_];
+    my $dot                = $self->[_file_extension_separator_];
+    my $diagnostics_object = $self->[_diagnostics_object_];
 
     my $destination_stream = $rinput_hash->{'destination'};
     my $errorfile_stream   = $rinput_hash->{'errorfile'};
@@ -1998,9 +2001,8 @@ EOM
         else {
             if ($in_place_modify) {
 
-                # Send output to a temporary array buffer. This will
-                # allow efficient copying back to the input by
-                # sub backup_and_modify_in_place, below.
+                # Send output to a temporary array buffer. This is
+                # required by sub backup_and_modify_in_place, below.
                 my @tmp_buff;
                 $output_file = \@tmp_buff;
                 $output_name = $display_name;
@@ -2047,13 +2049,6 @@ EOM
             $logger_object->complain( ${$rpending_complaint} );
         }
 
-        # Use input line endings if requested
-        my $line_separator = $line_separator_default;
-        if ( $rOpts->{'preserve-line-endings'} ) {
-            my $ls_input = find_input_line_ending($input_file);
-            if ( defined($ls_input) ) { $line_separator = $ls_input }
-        }
-
         # additional parameters needed by lower level routines
         $self->[_actual_output_extension_] = $actual_output_extension;
         $self->[_debugfile_stream_]        = $debugfile_stream;
@@ -2063,7 +2058,6 @@ EOM
         $self->[_fileroot_]                = $fileroot;
         $self->[_is_encoded_data_]         = $is_encoded_data;
         $self->[_length_function_]         = $length_function;
-        $self->[_line_separator_]          = $line_separator;
         $self->[_logger_object_]           = $logger_object;
         $self->[_output_file_]             = $output_file;
         $self->[_teefile_stream_]          = $teefile_stream;
@@ -2171,7 +2165,8 @@ sub process_filter_layer {
     my $prefilter          = $self->[_prefilter_];
     my $postfilter         = $self->[_postfilter_];
     my $decoded_input_as   = $self->[_decoded_input_as_];
-    my $line_separator     = $self->[_line_separator_];
+
+    my $change_line_separator = $self->[_line_separator_] ne "\n";
 
     my $remove_terminal_newline =
       !$rOpts->{'add-terminal-newline'} && substr( $buf, -1, 1 ) !~ /\n/;
@@ -2218,6 +2213,7 @@ sub process_filter_layer {
         # These are used below, just after iterations are made.
         $use_postfilter_buffer =
              $postfilter
+          || $change_line_separator
           || $remove_terminal_newline
           || $rOpts->{'assert-tidy'}
           || $rOpts->{'assert-untidy'}
@@ -2269,7 +2265,7 @@ EOM
             output_file => $use_postfilter_buffer
             ? \$postfilter_buffer
             : $output_file,
-            line_separator  => $line_separator,
+            line_separator  => "\n",
             is_encoded_data => $is_encoded_data,
         );
     }
@@ -2294,9 +2290,13 @@ EOM
     #--------------------------------
     if ($use_postfilter_buffer) {
 
+        # Note that here we must pass the ultimate '$line_separator' so that
+        # any non-native $line_separator will be applied in the final 'print'
+        # step below.  The native line separator is used in all intermediate
+        # iterations and filter operations so that string operations work ok.
         my $sink_object_post = Perl::Tidy::LineSink->new(
             output_file     => $output_file,
-            line_separator  => $line_separator,
+            line_separator  => $self->[_line_separator_],
             is_encoded_data => $is_encoded_data,
         );
 
@@ -2413,7 +2413,6 @@ sub process_iteration_layer {
     my $fileroot           = $self->[_fileroot_];
     my $is_encoded_data    = $self->[_is_encoded_data_];
     my $length_function    = $self->[_length_function_];
-    my $line_separator     = $self->[_line_separator_];
     my $logger_object      = $self->[_logger_object_];
     my $rOpts              = $self->[_rOpts_];
     my $tabsize            = $self->[_tabsize_];
@@ -2494,7 +2493,7 @@ sub process_iteration_layer {
         if ( $iter < $max_iterations ) {
             $sink_object = Perl::Tidy::LineSink->new(
                 output_file     => \$sink_buffer,
-                line_separator  => $line_separator,
+                line_separator  => "\n",
                 is_encoded_data => $is_encoded_data,
             );
         }