From: Don Armstrong <don@donarmstrong.com>
Date: Wed, 30 Sep 2015 03:31:32 +0000 (-0700)
Subject: Add IO::InnerFile support to Debbugs::Log and use it
X-Git-Tag: release/2.6.0~219
X-Git-Url: https://git.donarmstrong.com/?p=debbugs.git;a=commitdiff_plain;h=3ca3d92a6dd4a37cb69d801d80be95379a044d40

Add IO::InnerFile support to Debbugs::Log and use it

Use the inner_file handle in cgi/bugreport and Debbugs::CGI::Bugreport
to avoid having the entire file in memory all of the time

Eventually we will want to use an index to avoid having to read every
single line of the bug log for every single message. Some day. [If
you're reading this, you could make that day sooner!]
---

diff --git a/Debbugs/CGI/Bugreport.pm b/Debbugs/CGI/Bugreport.pm
index 0d7658e5..1121516f 100644
--- a/Debbugs/CGI/Bugreport.pm
+++ b/Debbugs/CGI/Bugreport.pm
@@ -39,6 +39,7 @@ use Debbugs::CGI qw(:url :html :util);
 use Debbugs::Common qw(globify_scalar english_join);
 use Debbugs::UTF8;
 use Debbugs::Config qw(:config);
+use Debbugs::Log qw(:read);
 use POSIX qw(strftime);
 use Encode qw(decode_utf8 encode_utf8);
 use URI::Escape qw(uri_escape_utf8);
@@ -313,22 +314,28 @@ appropriate.
 =cut
 
 sub handle_email_message{
-     my ($email,%param) = @_;
+     my ($record,%param) = @_;
 
      my $output;
      my $output_fh = globify_scalar(\$output);
      my $entity;
      my $tempdir;
-     if (not blessed $email) {
+     if (not blessed $record) {
          my $parser = MIME::Parser->new();
          # Because we are using memory, not tempfiles, there's no need to
          # clean up here like in Debbugs::MIME
          # this will be cleaned up once it goes out of scope
          $tempdir = File::Temp->newdir();
          $parser->output_under($tempdir->dirname());
-         $entity = $parser->parse_data( $email);
+         if ($record->{inner_file}) {
+             $entity = $parser->parse($record->{fh}) or
+                 die "Unable to parse entity";
+         } else {
+             $entity = $parser->parse_data($record->{text}) or
+                 die "Unable to parse entity";
+         }
      } else {
-         $entity = $email
+         $entity = $record;
      }
      my @attachments = ();
      my $raw_output =
@@ -366,7 +373,7 @@ sub handle_record{
      local $_ = $record->{type};
      if (/html/) {
 	 # $record->{text} is not in perl's internal encoding; convert it
-	 my $text = decode_rfc1522(decode_utf8($record->{text}));
+	 my $text = decode_rfc1522(decode_utf8(record_text($record)));
 	  my ($time) = $text =~ /<!--\s+time:(\d+)\s+-->/;
 	  my $class = $text =~ /^<strong>(?:Acknowledgement|Reply|Information|Report|Notification)/m ? 'infmessage':'msgreceived';
 	  $output .= $text;
@@ -412,8 +419,8 @@ sub handle_record{
 	  $output = qq(<div class="$class"><hr>\n<a name="$msg_number"></a>\n) . $output . "</div>\n";
      }
      elsif (/recips/) {
-	  my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
-	  if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
+         my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/i);
+         if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
 	       return ();
 	  }
 	  elsif (defined $msg_id) {
@@ -421,7 +428,7 @@ sub handle_record{
 	  }
 	  $output .= qq(<hr><p class="msgreceived"><a name="$msg_number"></a>\n);
 	  $output .= 'View this message in <a href="' . html_escape(bug_links(bug=>$bug_number, links_only => 1, options=>{msg=>$msg_number, mbox=>'yes'})) . '">rfc822 format</a></p>';
-	  $output .= handle_email_message($record->{text},
+	  $output .= handle_email_message($record,
 					  ref     => $bug_number,
 					  msg_num => $msg_number,
                                           %param,
@@ -431,7 +438,7 @@ sub handle_record{
 	  # Do nothing
      }
      elsif (/incoming-recv/) {
-	  my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
+         my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/i);
 	  if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
 	       return ();
 	  }
@@ -439,7 +446,7 @@ sub handle_record{
 	       $$seen_msg_ids{$msg_id} = 1;
 	  }
 	  # Incomming Mail Message
-	  my ($received,$hostname) = $record->{text} =~ m/Received: \(at (\S+)\) by (\S+)\;/;
+	  my ($received,$hostname) = record_regex($record,qr/Received: \(at (\S+)\) by (\S+)\;/o);
 	  $output .= qq|<hr><p class="msgreceived"><a name="$msg_number"></a><a name="msg$msg_number"></a><a href="#$msg_number">Message #$msg_number</a> received at |.
 	       html_escape("$received\@$hostname") .
 		    q| (<a href="| . html_escape(bug_links(bug => $bug_number, links_only => 1, options => {msg=>$msg_number})) . '">full text</a>'.
@@ -454,7 +461,12 @@ sub handle_record{
           # this will be cleaned up once it goes out of scope
           my $tempdir = File::Temp->newdir();
           $parser->output_under($tempdir->dirname());
-          my $entity = $parser->parse_data($record->{text});
+         my $entity;
+         if ($record->{inner_file}) {
+             $entity = $parser->parse($record->{fh});
+         } else {
+             $entity = $parser->parse_data($record->{text});
+         }
           my $r_l = reply_headers($entity);
           $output .= q(<a href=").
               html_escape('mailto:'.$bug_number.'@'.$config{email_domain}.'?'.
diff --git a/Debbugs/Log.pm b/Debbugs/Log.pm
index 551fd392..dce86d5c 100644
--- a/Debbugs/Log.pm
+++ b/Debbugs/Log.pm
@@ -25,7 +25,7 @@ BEGIN {
     @EXPORT = ();
     %EXPORT_TAGS = (write => [qw(write_log_records),
 			     ],
-		    read  => [qw(read_log_records),
+		    read  => [qw(read_log_records record_text record_regex),
 			     ],
 		    misc  => [qw(escape_log),
 			     ],
@@ -40,6 +40,7 @@ use Carp;
 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
 use Params::Validate qw(:types validate_with);
 use Encode qw(encode encode_utf8 is_utf8);
+use IO::InnerFile;
 
 =head1 NAME
 
@@ -169,6 +170,7 @@ sub new
     my %param;
     if (@_ == 1) {
 	 ($param{logfh}) = @_;
+	 $param{inner_file} = 0;
     }
     else {
 	 %param = validate_with(params => \@_,
@@ -180,7 +182,10 @@ sub new
 						      },
 					   log_name => {type => SCALAR,
 							optional => 1,
-						       },
+                                   },
+                           inner_file => {type => BOOLEAN,
+                                          default => 0,
+                                         },
 					  }
 			       );
     }
@@ -208,6 +213,7 @@ sub new
 
     $self->{state} = 'kill-init';
     $self->{linenum} = 0;
+    $self->{inner_file} = $param{inner_file};
     return $self;
 }
 
@@ -245,16 +251,21 @@ sub read_record
 	    }
 
 	    $this->{state} = $newstate;
-
 	    if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
-		$record->{type} = $this->{state};
+            $record->{type} = $this->{state};
+            $record->{start} = $logfh->tell;
+            $record->{stop} = $logfh->tell;
+            $record->{inner_file} = $this->{inner_file};
 	    } elsif ($this->{state} eq 'kill-end') {
+            if ($this->{inner_file}) {
+                $record->{fh} = IO::InnerFile->new($logfh,$record->{start},$record->{stop} - $record->{start})
+            }
 		return $record;
 	    }
 
 	    next;
 	}
-
+    $record->{stop} = $logfh->tell;
 	$_ = $line;
 	if ($this->{state} eq 'incoming-recv') {
 	    my $pl = $_;
@@ -262,14 +273,14 @@ sub read_record
 		die "bad line '$pl' in state incoming-recv";
 	    }
 	    $this->{state} = 'go';
-	    $record->{text} .= "$_\n";
+	    $record->{text} .= "$_\n" unless $this->{inner_file};
 	} elsif ($this->{state} eq 'html') {
-	    $record->{text} .= "$_\n";
+	    $record->{text} .= "$_\n"  unless $this->{inner_file};
 	} elsif ($this->{state} eq 'go') {
 	    s/^\030//;
-	    $record->{text} .= "$_\n";
+	    $record->{text} .= "$_\n"  unless $this->{inner_file};
 	} elsif ($this->{state} eq 'go-nox') {
-	    $record->{text} .= "$_\n";
+	    $record->{text} .= "$_\n"  unless $this->{inner_file};
 	} elsif ($this->{state} eq 'recips') {
 	    if (/^-t$/) {
 		undef $record->{recips};
@@ -279,11 +290,11 @@ sub read_record
 	    }
 	    $this->{state} = 'kill-body';
 	} elsif ($this->{state} eq 'autocheck') {
-	    $record->{text} .= "$_\n";
+	    $record->{text} .= "$_\n" unless $this->{inner_file};
 	    next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
 	    $this->{state} = 'autowait';
 	} elsif ($this->{state} eq 'autowait') {
-	    $record->{text} .= "$_\n";
+	    $record->{text} .= "$_\n" unless $this->{inner_file};
 	    next if !/^$/;
 	    $this->{state} = 'go-nox';
 	} else {
@@ -326,6 +337,9 @@ sub read_log_records
 					   log_name => {type => SCALAR,
 							optional => 1,
 						       },
+                           inner_file => {type => BOOLEAN,
+                                          default => 0,
+                                         },
 					  }
 			       );
     }
@@ -452,6 +466,41 @@ sub escape_log {
 }
 
 
+sub record_text {
+    my ($record) = @_;
+    if ($record->{inner_file}) {
+        local $/;
+        my $text;
+        my $t = $record->{fh};
+        $text = <$t>;
+        $record->{fh}->seek(0,0);
+        return $text;
+    } else {
+        return $record->{text};
+    }
+}
+
+sub record_regex {
+    my ($record,$regex) = @_;
+    if ($record->{inner_file}) {
+        my @result;
+        my $fh = $record->{fh};
+        while (<$fh>) {
+            if (@result = $_ =~ m/$regex/) {
+                $record->{fh}->seek(0,0);
+                return @result;
+            }
+        }
+        $record->{fh}->seek(0,0);
+        return ();
+    } else {
+        my @result = $record->{text} =~ m/$regex/;
+        return @result;
+        return $record->{text};
+    }
+}
+
+
 =head1 CAVEATS
 
 This module does none of the formatting that bugreport.cgi et al do. It's
diff --git a/cgi/bugreport.cgi b/cgi/bugreport.cgi
index 9064ca8c..0a3071a3 100755
--- a/cgi/bugreport.cgi
+++ b/cgi/bugreport.cgi
@@ -156,7 +156,7 @@ my %status =
 
 my @records;
 eval{
-     @records = read_log_records($buglogfh);
+     @records = read_log_records(logfh => $buglogfh,inner_file => 1);
 };
 if ($@) {
      quitcgi("Bad bug log for $gBug $ref. Unable to read records: $@");
@@ -225,23 +225,30 @@ END
 	  my $wanted_type = $mbox_maint?'recips':'incoming-recv';
 	  # we want to include control messages anyway
 	  my $record_wanted_anyway = 0;
-	  my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
+	  my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/im);
 	  next if defined $msg_id and exists $seen_message_ids{$msg_id};
 	  next if defined $msg_id and $msg_id =~/handler\..+\.ack(?:info|done)?\@/;
-	  $record_wanted_anyway = 1 if $record->{text} =~ /^Received: \(at control\)/;
+	  $record_wanted_anyway = 1 if record_regex($record,qr/^Received: \(at control\)/);
 	  next if not $boring and not $record->{type} eq $wanted_type and not $record_wanted_anyway and @records > 1;
 	  $seen_message_ids{$msg_id} = 1 if defined $msg_id;
-	  my @lines = split( "\n", $record->{text}, -1 );
+      my @lines;
+      if ($record->{inner_file}) {
+          push @lines, $record->{fh}->getline;
+          push @lines, $record->{fh}->getline;
+      } else {
+          @lines = split( "\n", $record->{text}, -1 );
+      }
 	  if ( $lines[ 1 ] =~ m/^From / ) {
-	       my $tmp = $lines[ 0 ];
-	       $lines[ 0 ] = $lines[ 1 ];
-	       $lines[ 1 ] = $tmp;
+          @lines = reverse @lines;
 	  }
 	  if ( !( $lines[ 0 ] =~ m/^From / ) ) {
 	       unshift @lines, "From unknown $date";
 	  }
-	  map { s/^(>*From )/>$1/ } @lines[ 1 .. $#lines ];
-	  print join( "\n", @lines ) . "\n";
+	  print map { s/^(>*From )/>$1/; $_."\n" } @lines[ 1 .. $#lines ];
+      if ($record->{inner_file}) {
+          my $fh = $record->{fh};
+          print <$fh>;
+      }
      }
      exit 0;
 }
@@ -250,7 +257,7 @@ else {
      if (defined $att and defined $msg and @records) {
 	 binmode(STDOUT,":raw");
 	  $msg_num++;
-	  print handle_email_message($records[0]->{text},
+	  print handle_email_message($records[0],
 				     ref => $ref,
 				     msg_num => $msg_num,
 				     att => $att,