From: Steve Hancock Date: Mon, 17 Jul 2023 22:05:52 +0000 (-0700) Subject: simplify code for line-endings X-Git-Tag: 20230701.01~1 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=9d78036dd72b0cc27451cb507a4ef55caef00ebc;p=perltidy.git simplify code for line-endings --- diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 76f54808..615ff84f 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -273,41 +273,36 @@ EOM sub find_input_line_ending { - # Peek at a file and return first line ending character. - # Return undefined value in case of any trouble. - my ($input_file) = @_; - my $ending; + # Given: + # $buf = raw first line of input file + # Return + # first line ending character. + # undefined value in case of any trouble. + my ($buf) = @_; - # silently ignore input from object or stdin - if ( ref($input_file) || $input_file eq '-' ) { - return $ending; - } + my $ending; + if ($buf) { - my $fh; - open( $fh, '<', $input_file ) || return $ending; + if ( $buf =~ /([\012\015]+)/ ) { + my $test = $1; - binmode $fh; - my $buf; - read( $fh, $buf, 1024 ); - close $fh || return $ending; - if ( $buf && $buf =~ /([\012\015]+)/ ) { - my $test = $1; + # dos + if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } - # dos - if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } + # mac + elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } - # mac - elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } + # unix + elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } - # unix - elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } + # unknown + else { } + } - # unknown + # no ending seen else { } - } - # no ending seen - else { } + } return $ending; } ## end sub find_input_line_ending @@ -1202,15 +1197,6 @@ sub backup_method_copy { } } - # or in a SCALAR ref (less efficient, and only used for testing) - elsif ( ref($output_file) eq 'SCALAR' ) { - foreach my $line ( split /^/, ${$output_file} ) { - $fout->print($line) - or - Die("cannot print to '$input_file' with -b option: $OS_ERROR\n"); - } - } - # Error if anything else ... # This can only happen if the output was changed from \@tmp_buff else { @@ -1498,11 +1484,13 @@ sub get_decoded_string_buffer { # Decode the input buffer if necessary or requested - # Given + # Given: # $input_file = the input file or stream # $display_name = its name to use in error messages - # Return + # Set $self->[_line_separator_], and + + # Return: # $buf = string buffer with input, decoded from utf8 if necessary # $is_encoded_data = true if $buf is decoded from utf8 # $decoded_input_as = true if perltidy decoded input buf @@ -1518,13 +1506,26 @@ sub get_decoded_string_buffer { # return nothing if error return unless ($fh); - my $buf = EMPTY_STRING; - my $count = 0; + my $buf = EMPTY_STRING; + my $line_separator = $self->[_line_separator_default_]; + my $count = 0; + while ( my $line = $fh->getline() ) { $buf .= $line; + + # Find and change the line separator if requested with -ple + if ( !$count && $rOpts->{'preserve-line-endings'} ) { + + # Limit string length in case we have a strange file + my $line1_raw = substr( $line, 0, 1024 ); + my $ls_input = find_input_line_ending($line1_raw); + if ( defined($ls_input) ) { $line_separator = $ls_input } + } $count++; } + $self->[_line_separator_] = $line_separator; + # patch to read raw mac files under unix, dos # look for a single line with embedded \r's if ( $count == 1 && $buf =~ /[\015][^\015\012]/ ) { @@ -1534,6 +1535,9 @@ sub get_decoded_string_buffer { } } + if ( $rOpts->{'preserve-line-endings'} ) { + } + my $encoding_in = EMPTY_STRING; my $rOpts_character_encoding = $rOpts->{'character-encoding'}; my $encoding_log_message; @@ -1737,10 +1741,9 @@ sub process_all_files { # process_iteration_layer - handle any iterations on formatting # process_single_case - solves one formatting problem - my $rOpts = $self->[_rOpts_]; - my $dot = $self->[_file_extension_separator_]; - my $diagnostics_object = $self->[_diagnostics_object_]; - my $line_separator_default = $self->[_line_separator_default_]; + my $rOpts = $self->[_rOpts_]; + my $dot = $self->[_file_extension_separator_]; + my $diagnostics_object = $self->[_diagnostics_object_]; my $destination_stream = $rinput_hash->{'destination'}; my $errorfile_stream = $rinput_hash->{'errorfile'}; @@ -1998,9 +2001,8 @@ EOM else { if ($in_place_modify) { - # Send output to a temporary array buffer. This will - # allow efficient copying back to the input by - # sub backup_and_modify_in_place, below. + # Send output to a temporary array buffer. This is + # required by sub backup_and_modify_in_place, below. my @tmp_buff; $output_file = \@tmp_buff; $output_name = $display_name; @@ -2047,13 +2049,6 @@ EOM $logger_object->complain( ${$rpending_complaint} ); } - # Use input line endings if requested - my $line_separator = $line_separator_default; - if ( $rOpts->{'preserve-line-endings'} ) { - my $ls_input = find_input_line_ending($input_file); - if ( defined($ls_input) ) { $line_separator = $ls_input } - } - # additional parameters needed by lower level routines $self->[_actual_output_extension_] = $actual_output_extension; $self->[_debugfile_stream_] = $debugfile_stream; @@ -2063,7 +2058,6 @@ EOM $self->[_fileroot_] = $fileroot; $self->[_is_encoded_data_] = $is_encoded_data; $self->[_length_function_] = $length_function; - $self->[_line_separator_] = $line_separator; $self->[_logger_object_] = $logger_object; $self->[_output_file_] = $output_file; $self->[_teefile_stream_] = $teefile_stream; @@ -2171,7 +2165,8 @@ sub process_filter_layer { my $prefilter = $self->[_prefilter_]; my $postfilter = $self->[_postfilter_]; my $decoded_input_as = $self->[_decoded_input_as_]; - my $line_separator = $self->[_line_separator_]; + + my $change_line_separator = $self->[_line_separator_] ne "\n"; my $remove_terminal_newline = !$rOpts->{'add-terminal-newline'} && substr( $buf, -1, 1 ) !~ /\n/; @@ -2218,6 +2213,7 @@ sub process_filter_layer { # These are used below, just after iterations are made. $use_postfilter_buffer = $postfilter + || $change_line_separator || $remove_terminal_newline || $rOpts->{'assert-tidy'} || $rOpts->{'assert-untidy'} @@ -2269,7 +2265,7 @@ EOM output_file => $use_postfilter_buffer ? \$postfilter_buffer : $output_file, - line_separator => $line_separator, + line_separator => "\n", is_encoded_data => $is_encoded_data, ); } @@ -2294,9 +2290,13 @@ EOM #-------------------------------- if ($use_postfilter_buffer) { + # Note that here we must pass the ultimate '$line_separator' so that + # any non-native $line_separator will be applied in the final 'print' + # step below. The native line separator is used in all intermediate + # iterations and filter operations so that string operations work ok. my $sink_object_post = Perl::Tidy::LineSink->new( output_file => $output_file, - line_separator => $line_separator, + line_separator => $self->[_line_separator_], is_encoded_data => $is_encoded_data, ); @@ -2413,7 +2413,6 @@ sub process_iteration_layer { my $fileroot = $self->[_fileroot_]; my $is_encoded_data = $self->[_is_encoded_data_]; my $length_function = $self->[_length_function_]; - my $line_separator = $self->[_line_separator_]; my $logger_object = $self->[_logger_object_]; my $rOpts = $self->[_rOpts_]; my $tabsize = $self->[_tabsize_]; @@ -2494,7 +2493,7 @@ sub process_iteration_layer { if ( $iter < $max_iterations ) { $sink_object = Perl::Tidy::LineSink->new( output_file => \$sink_buffer, - line_separator => $line_separator, + line_separator => "\n", is_encoded_data => $is_encoded_data, ); }