From 95a7e298ef7c772ad282f847eeba443d0790225d Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Mon, 20 Jun 2022 19:01:58 -0700
Subject: [PATCH] simplify structure

---
 lib/Perl/Tidy.pm            | 263 ++++++++++++++++++++----------------
 lib/Perl/Tidy/LineSink.pm   |  25 +---
 lib/Perl/Tidy/LineSource.pm |  13 --
 3 files changed, 150 insertions(+), 151 deletions(-)

diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm
index a90d90a6..8de23917 100644
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -846,18 +846,6 @@ EOM
     my ( $in_place_modify, $backup_extension, $delete_backup ) =
       $self->check_in_place_modify( $source_stream, $destination_stream );
 
-    # Turn off assert-tidy and assert-untidy unless we are tidying files
-    if ( $rOpts->{'format'} ne 'tidy' ) {
-        if ( $rOpts->{'assert-tidy'} ) {
-            $rOpts->{'assert-tidy'} = 0;
-            Warn("ignoring --assert-tidy, --format is not 'tidy'\n");
-        }
-        if ( $rOpts->{'assert-untidy'} ) {
-            $rOpts->{'assert-untidy'} = 0;
-            Warn("ignoring --assert-untidy, --format is not 'tidy'\n");
-        }
-    }
-
     Perl::Tidy::Formatter::check_options($rOpts);
     Perl::Tidy::Tokenizer::check_options($rOpts);
     Perl::Tidy::VerticalAligner::check_options($rOpts);
@@ -1822,6 +1810,15 @@ sub process_filter_layer {
     #         -> ( optional destination_buffer for encoding )
     #           -> final sink_object
 
+    # What is done based on format type:
+    #  utf8 decoding is done for all format types
+    #  prefiltering is applied to all format types
+    #   - because it may be needed to get through the tokenizer
+    #  postfiltering is only done for format='tidy'
+    #   - might cause problems operating on html text
+    #  encoding of decoded output is only done for format='tidy'
+    #   - because html does its own encoding; user formatter does what it wants
+
     my $rOpts              = $self->[_rOpts_];
     my $is_encoded_data    = $self->[_is_encoded_data_];
     my $logger_object      = $self->[_logger_object_];
@@ -1833,118 +1830,133 @@ sub process_filter_layer {
     my $decoded_input_as   = $self->[_decoded_input_as_];
     my $line_separator     = $self->[_line_separator_];
 
-    # evaluate MD5 sum of input file for assert tests before any prefilter
-    my $digest_input = 0;
-    my $saved_input_buf;
-    if ( $rOpts->{'assert-tidy'} || $rOpts->{'assert-untidy'} ) {
-        $digest_input    = $md5_hex->($buf);
-        $saved_input_buf = $buf;
-    }
-
     my $remove_terminal_newline =
       !$rOpts->{'add-terminal-newline'} && substr( $buf, -1, 1 ) !~ /\n/;
 
-    # Create a 'sink_object' which knows how to write the output file
-    my ( $sink_object, $postfilter_buffer );
-
-    #-----------------------
-    # Setup prefilter buffer
-    #-----------------------
-    # If we need access to the output for filtering or checking assertions
-    # before writing to its ultimate destination, then we will send it
-    # to a temporary buffer. The variables are:
-    #  $postfilter_buffer     = the buffer to capture the output
-    #  $use_postfilter_buffer = is a postfilter buffer used?
-    # These are used below, just after iterations are made.
-    my $use_postfilter_buffer =
-         $postfilter
-      || $remove_terminal_newline
-      || $rOpts->{'assert-tidy'}
-      || $rOpts->{'assert-untidy'};
-
-    #-------------------------
-    # Setup destination_buffer
-    #-------------------------
-    # If the final output destination is not a file, then we might need to
-    # encode the result at the end of processing.  So in this case we will send
-    # the output to a temporary buffer.
-    # The key variables are:
-    #   $destination_buffer        - this will receive the formatted output
-    #   $use_destination_buffer    - is $destination_buffer used?
-    #   $encode_destination_buffer - is $destination_buffer to be encoded?
-    # These are used by sub 'copy_buffer_to_destination', below
+    # vars for postfilter, if used
+    my $use_postfilter_buffer;
+    my $postfilter_buffer;
 
+    # vars for destination buffer, if used
     my $destination_buffer;
     my $use_destination_buffer;
     my $encode_destination_buffer;
 
+    # vars for iterations, if done
+    my $sink_object;
+
+    # vars for checking assertions, if needed
+    my $digest_input = 0;
+    my $saved_input_buf;
+
     my $ref_destination_stream = ref($destination_stream);
-    if ( $ref_destination_stream && !$user_formatter ) {
-        $use_destination_buffer = 1;
-        $output_file            = \$destination_buffer;
-        $self->[_output_file_]  = $output_file;
-
-        # Strings and arrays use special encoding rules
-        if (   $ref_destination_stream eq 'SCALAR'
-            || $ref_destination_stream eq 'ARRAY' )
-        {
-            $encode_destination_buffer =
-              $rOpts->{'encode-output-strings'} && $decoded_input_as;
-        }
 
-        # An object with a print method will use file encoding rules
-        elsif ( $ref_destination_stream->can('print') ) {
-            $encode_destination_buffer = $is_encoded_data;
-        }
-        else {
-            confess <<EOM;
+    # Setup vars for postfilter, destination buffer, assertions and sink object
+    # if needed.  These are only used for 'tidy' formatting.
+    if ( $rOpts->{'format'} eq 'tidy' ) {
+
+        # evaluate MD5 sum of input file for assert tests before any prefilter
+        if ( $rOpts->{'assert-tidy'} || $rOpts->{'assert-untidy'} ) {
+            $digest_input    = $md5_hex->($buf);
+            $saved_input_buf = $buf;
+        }
+
+        #-----------------------
+        # Setup postfilter buffer
+        #-----------------------
+        # If we need access to the output for filtering or checking assertions
+        # before writing to its ultimate destination, then we will send it
+        # to a temporary buffer. The variables are:
+        #  $postfilter_buffer     = the buffer to capture the output
+        #  $use_postfilter_buffer = is a postfilter buffer used?
+        # These are used below, just after iterations are made.
+        $use_postfilter_buffer =
+             $postfilter
+          || $remove_terminal_newline
+          || $rOpts->{'assert-tidy'}
+          || $rOpts->{'assert-untidy'};
+
+        #-------------------------
+        # Setup destination_buffer
+        #-------------------------
+        # If the final output destination is not a file, then we might need to
+        # encode the result at the end of processing.  So in this case we will
+        # send the output to a temporary buffer.
+        # The key variables are:
+        #   $destination_buffer        - receives the formatted output
+        #   $use_destination_buffer    - is $destination_buffer used?
+        #   $encode_destination_buffer - encode $destination_buffer?
+        # These are used by sub 'copy_buffer_to_destination', below
+
+        if ($ref_destination_stream) {
+            $use_destination_buffer = 1;
+            $output_file            = \$destination_buffer;
+            $self->[_output_file_]  = $output_file;
+
+            # Strings and arrays use special encoding rules
+            if (   $ref_destination_stream eq 'SCALAR'
+                || $ref_destination_stream eq 'ARRAY' )
+            {
+                $encode_destination_buffer =
+                  $rOpts->{'encode-output-strings'} && $decoded_input_as;
+            }
+
+            # An object with a print method will use file encoding rules
+            elsif ( $ref_destination_stream->can('print') ) {
+                $encode_destination_buffer = $is_encoded_data;
+            }
+            else {
+                confess <<EOM;
 ------------------------------------------------------------------------
 No 'print' method is defined for object of class '$ref_destination_stream'
 Please check your call to Perl::Tidy::perltidy. Trace follows.
 ------------------------------------------------------------------------
 EOM
+            }
         }
-    }
 
-    #-------------------------------------------
-    # Make a sink object for the iteration phase
-    #-------------------------------------------
-    $sink_object = Perl::Tidy::LineSink->new(
-        output_file => $use_postfilter_buffer
-        ? \$postfilter_buffer
-        : $output_file,
-        line_separator  => $line_separator,
-        rOpts           => $rOpts,
-        is_encoded_data => $is_encoded_data,
-    );
+        #-------------------------------------------
+        # Make a sink object for the iteration phase
+        #-------------------------------------------
+        $sink_object = Perl::Tidy::LineSink->new(
+            output_file => $use_postfilter_buffer
+            ? \$postfilter_buffer
+            : $output_file,
+            line_separator  => $line_separator,
+            is_encoded_data => $is_encoded_data,
+        );
+    }
 
-    #-------------------------------------------------------
-    # Apply any prefilter. The prefilter is a code reference
-    # that will be applied to the source before tidying.
-    #-------------------------------------------------------
+    #-----------------------------------------------------------------------
+    # Apply any prefilter. The prefilter is a code reference that will be
+    # applied to the source before tokenizing.  Note that we are doing this
+    # for all format types ('tidy', 'html', 'user') because it may be needed
+    # to avoid tokenization errors.
+    #-----------------------------------------------------------------------
     $buf = $prefilter->($buf) if $prefilter;
 
-    #----------------------------------------------------------
+    #----------------------------------------------------------------------
     # Format contents of string '$buf', iterating if requested.
-    # Formatted result will be written to '$sink_object'
-    #----------------------------------------------------------
+    # For 'tidy', formatted result will be written to '$sink_object'
+    # For 'html' and 'user', result goes directly to its ultimate destination.
+    #----------------------------------------------------------------------
     $self->process_iteration_layer( $buf, $sink_object );
-    $sink_object->close_output_file() if $sink_object;
 
     #--------------------------------
     # Do postfilter buffer processing
     #--------------------------------
     if ($use_postfilter_buffer) {
 
-        $sink_object = Perl::Tidy::LineSink->new(
+        my $sink_object_post = Perl::Tidy::LineSink->new(
             output_file     => $output_file,
             line_separator  => $line_separator,
-            rOpts           => $rOpts,
             is_encoded_data => $is_encoded_data,
         );
 
+        #----------------------------------------------------------------------
         # Apply any postfilter. The postfilter is a code reference that will be
         # applied to the source after tidying.
+        #----------------------------------------------------------------------
         my $buf_post =
             $postfilter
           ? $postfilter->($postfilter_buffer)
@@ -1965,6 +1977,7 @@ EOM
                 $logger_object->resume_logfile();
             }
         }
+
         if ( $rOpts->{'assert-untidy'} ) {
             my $digest_output = $md5_hex->($buf_post);
             if ( $digest_output eq $digest_input ) {
@@ -1982,7 +1995,7 @@ EOM
         # Copy the filtered buffer to the final destination
         if ( !$remove_terminal_newline ) {
             while ( my $line = $source_object->get_line() ) {
-                $sink_object->write_line($line);
+                $sink_object_post->write_line($line);
             }
         }
         else {
@@ -1991,16 +2004,16 @@ EOM
             # final line
             my $line;
             while ( my $next_line = $source_object->get_line() ) {
-                $sink_object->write_line($line) if ($line);
+                $sink_object_post->write_line($line) if ($line);
                 $line = $next_line;
             }
             if ($line) {
-                $sink_object->set_line_separator(undef);
+                $sink_object_post->set_line_separator(undef);
                 chomp $line;
-                $sink_object->write_line($line);
+                $sink_object_post->write_line($line);
             }
         }
-
+        $sink_object_post->close_output_file();
         $source_object->close_input_file();
     }
 
@@ -2013,8 +2026,8 @@ EOM
     }
     else {
 
-        # output went to a file ...
-        if ($is_encoded_data) {
+        # output went to a file in 'tidy' mode...
+        if ( $is_encoded_data && $rOpts->{'format'} eq 'tidy' ) {
             $rstatus->{'output_encoded_as'} = 'UTF-8';
         }
     }
@@ -2030,7 +2043,10 @@ sub process_iteration_layer {
     my ( $self, $buf, $sink_object ) = @_;
 
     # Do all formatting, iterating if requested, on the source string $buf.
-    # Return the result in the $sink_object.
+    # Output depends on format type:
+    #   For 'tidy' formatting, output goes to sink object
+    #   For 'html' formatting, output goes to the ultimate destination
+    #   For 'user' formatting, user formatter handles output
 
     # Total formatting is done with three layers of subroutines:
     #   process_filter_layer    - do any pre and post processing
@@ -2040,6 +2056,8 @@ sub process_iteration_layer {
     # Data Flow in this layer:
     #      $buf -> [ loop over iterations ] -> $sink_object
 
+    # Only 'tidy' formatting can use multiple iterations.
+
     my $diagnostics_object = $self->[_diagnostics_object_];
     my $display_name       = $self->[_display_name_];
     my $fileroot           = $self->[_fileroot_];
@@ -2081,27 +2099,35 @@ sub process_iteration_layer {
         }
     }
 
+    # vars for iterations and convergence test
+    my $max_iterations = 1;
     my $convergence_log_message;
-    my $max_iterations = $rOpts->{'iterations'};
-
-    # check iteration count and quietly fix if necessary:
-    # - iterations option only applies to code beautification mode
-    # - the convergence check should stop most runs on iteration 2, and
-    #   virtually all on iteration 3.  But we'll allow up to 6.
-    if (   !defined($max_iterations)
-        || $max_iterations <= 0
-        || $rOpts->{'format'} ne 'tidy' )
-    {
-        $max_iterations = 1;
-    }
-    elsif ( $max_iterations > 6 ) { $max_iterations = 6 }
-
-    # get starting MD5 sum for convergence test
+    my $do_convergence_test;
     my %saw_md5;
-    my $do_convergence_test = $max_iterations > 1;
-    if ($do_convergence_test) {
-        my $digest = $md5_hex->($buf);
-        $saw_md5{$digest} = 0;
+
+    # Only 'tidy' formatting can use multiple iterations
+    if ( $rOpts->{'format'} eq 'tidy' ) {
+
+        # check iteration count and quietly fix if necessary:
+        # - iterations option only applies to code beautification mode
+        # - the convergence check should stop most runs on iteration 2, and
+        #   virtually all on iteration 3.  But we'll allow up to 6.
+        $max_iterations = $rOpts->{'iterations'};
+        if ( !defined($max_iterations)
+            || $max_iterations <= 0 )
+        {
+            $max_iterations = 1;
+        }
+        elsif ( $max_iterations > 6 ) {
+            $max_iterations = 6;
+        }
+
+        # get starting MD5 sum for convergence test
+        if ( $max_iterations > 1 ) {
+            $do_convergence_test = 1;
+            my $digest = $md5_hex->($buf);
+            $saw_md5{$digest} = 0;
+        }
     }
 
     # save objects to allow redirecting output during iterations
@@ -2122,7 +2148,6 @@ sub process_iteration_layer {
             $sink_object = Perl::Tidy::LineSink->new(
                 output_file     => \$sink_buffer,
                 line_separator  => $line_separator,
-                rOpts           => $rOpts,
                 is_encoded_data => $is_encoded_data,
             );
         }
@@ -2337,13 +2362,17 @@ EOM
         } ## end if ( $iter < $max_iterations)
     } ## end loop over iterations for one source file
 
+    $sink_object->close_output_file()    if $sink_object;
     $debugger_object->close_debug_file() if $debugger_object;
     $fh_tee->close()                     if $fh_tee;
+
+    # leave logger object open for additional messages
     $logger_object = $logger_object_final;
     $logger_object->write_logfile_entry($convergence_log_message)
       if $convergence_log_message;
 
     return;
+
 } ## end sub process_iteration_layer
 
 sub process_single_case {
diff --git a/lib/Perl/Tidy/LineSink.pm b/lib/Perl/Tidy/LineSink.pm
index c418a9f9..47dd561b 100644
--- a/lib/Perl/Tidy/LineSink.pm
+++ b/lib/Perl/Tidy/LineSink.pm
@@ -43,39 +43,22 @@ sub new {
     my %defaults = (
         output_file     => undef,
         line_separator  => undef,
-        rOpts           => undef,
         is_encoded_data => undef,
     );
     my %args = ( %defaults, @args );
 
     my $output_file     = $args{output_file};
     my $line_separator  = $args{line_separator};
-    my $rOpts           = $args{rOpts};
     my $is_encoded_data = $args{is_encoded_data};
 
     my $fh = undef;
 
     my $output_file_open = 0;
 
-    if ( $rOpts->{'format'} eq 'tidy' ) {
-        ( $fh, $output_file ) =
-          Perl::Tidy::streamhandle( $output_file, 'w', $is_encoded_data );
-        unless ($fh) { Perl::Tidy::Die("Cannot write to output stream\n"); }
-        $output_file_open = 1;
-    }
-
-    # in order to check output syntax when standard output is used,
-    # or when it is an object, we have to make a copy of the file
-    if ( $output_file eq '-' || ref $output_file ) {
-        if ( $rOpts->{'check-syntax'} ) {
-
-            # NOTE: THIS IS OLD CODING, left for safety. Should not get here.
-            # Turning off syntax check when standard output is used.
-            # The reason is that temporary files cause problems on
-            # on many systems.
-            $rOpts->{'check-syntax'} = 0;
-        }
-    }
+    ( $fh, $output_file ) =
+      Perl::Tidy::streamhandle( $output_file, 'w', $is_encoded_data );
+    unless ($fh) { Perl::Tidy::Die("Cannot write to output stream\n"); }
+    $output_file_open = 1;
 
     return bless {
         _fh               => $fh,
diff --git a/lib/Perl/Tidy/LineSource.pm b/lib/Perl/Tidy/LineSource.pm
index 288beb58..5ecf143e 100644
--- a/lib/Perl/Tidy/LineSource.pm
+++ b/lib/Perl/Tidy/LineSource.pm
@@ -61,19 +61,6 @@ sub new {
     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
     return unless $fh;
 
-    # in order to check output syntax when standard output is used,
-    # or when it is an object, we have to make a copy of the file
-    if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
-    {
-
-        # NOTE: THIS IS OLD CODING, left for safety. Should not get here.
-        # Turning off syntax check when input output is used.
-        # The reason is that temporary files cause problems on
-        # on many systems.
-        $rOpts->{'check-syntax'} = 0;
-
-    }
-
     return bless {
         _fh                => $fh,
         _filename          => $input_file,
-- 
2.39.5