From: Don Armstrong Date: Wed, 30 Sep 2015 03:31:32 +0000 (-0700) Subject: Add IO::InnerFile support to Debbugs::Log and use it X-Git-Tag: release/2.6.0~219 X-Git-Url: https://git.donarmstrong.com/?p=debbugs.git;a=commitdiff_plain;h=3ca3d92a6dd4a37cb69d801d80be95379a044d40 Add IO::InnerFile support to Debbugs::Log and use it Use the inner_file handle in cgi/bugreport and Debbugs::CGI::Bugreport to avoid having the entire file in memory all of the time Eventually we will want to use an index to avoid having to read every single line of the bug log for every single message. Some day. [If you're reading this, you could make that day sooner!] --- diff --git a/Debbugs/CGI/Bugreport.pm b/Debbugs/CGI/Bugreport.pm index 0d7658e..1121516 100644 --- a/Debbugs/CGI/Bugreport.pm +++ b/Debbugs/CGI/Bugreport.pm @@ -39,6 +39,7 @@ use Debbugs::CGI qw(:url :html :util); use Debbugs::Common qw(globify_scalar english_join); use Debbugs::UTF8; use Debbugs::Config qw(:config); +use Debbugs::Log qw(:read); use POSIX qw(strftime); use Encode qw(decode_utf8 encode_utf8); use URI::Escape qw(uri_escape_utf8); @@ -313,22 +314,28 @@ appropriate. =cut sub handle_email_message{ - my ($email,%param) = @_; + my ($record,%param) = @_; my $output; my $output_fh = globify_scalar(\$output); my $entity; my $tempdir; - if (not blessed $email) { + if (not blessed $record) { my $parser = MIME::Parser->new(); # Because we are using memory, not tempfiles, there's no need to # clean up here like in Debbugs::MIME # this will be cleaned up once it goes out of scope $tempdir = File::Temp->newdir(); $parser->output_under($tempdir->dirname()); - $entity = $parser->parse_data( $email); + if ($record->{inner_file}) { + $entity = $parser->parse($record->{fh}) or + die "Unable to parse entity"; + } else { + $entity = $parser->parse_data($record->{text}) or + die "Unable to parse entity"; + } } else { - $entity = $email + $entity = $record; } my @attachments = (); my $raw_output = @@ -366,7 +373,7 @@ sub handle_record{ local $_ = $record->{type}; if (/html/) { # $record->{text} is not in perl's internal encoding; convert it - my $text = decode_rfc1522(decode_utf8($record->{text})); + my $text = decode_rfc1522(decode_utf8(record_text($record))); my ($time) = $text =~ //; my $class = $text =~ /^(?:Acknowledgement|Reply|Information|Report|Notification)/m ? 'infmessage':'msgreceived'; $output .= $text; @@ -412,8 +419,8 @@ sub handle_record{ $output = qq(

\n\n) . $output . "
\n"; } elsif (/recips/) { - my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im; - if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) { + my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/i); + if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) { return (); } elsif (defined $msg_id) { @@ -421,7 +428,7 @@ sub handle_record{ } $output .= qq(

\n); $output .= 'View this message in rfc822 format

'; - $output .= handle_email_message($record->{text}, + $output .= handle_email_message($record, ref => $bug_number, msg_num => $msg_number, %param, @@ -431,7 +438,7 @@ sub handle_record{ # Do nothing } elsif (/incoming-recv/) { - my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im; + my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/i); if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) { return (); } @@ -439,7 +446,7 @@ sub handle_record{ $$seen_msg_ids{$msg_id} = 1; } # Incomming Mail Message - my ($received,$hostname) = $record->{text} =~ m/Received: \(at (\S+)\) by (\S+)\;/; + my ($received,$hostname) = record_regex($record,qr/Received: \(at (\S+)\) by (\S+)\;/o); $output .= qq|

Message #$msg_number received at |. html_escape("$received\@$hostname") . q| (full text'. @@ -454,7 +461,12 @@ sub handle_record{ # this will be cleaned up once it goes out of scope my $tempdir = File::Temp->newdir(); $parser->output_under($tempdir->dirname()); - my $entity = $parser->parse_data($record->{text}); + my $entity; + if ($record->{inner_file}) { + $entity = $parser->parse($record->{fh}); + } else { + $entity = $parser->parse_data($record->{text}); + } my $r_l = reply_headers($entity); $output .= q({state} = 'go'; - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'html') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'go') { s/^\030//; - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'go-nox') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'recips') { if (/^-t$/) { undef $record->{recips}; @@ -279,11 +290,11 @@ sub read_record } $this->{state} = 'kill-body'; } elsif ($this->{state} eq 'autocheck') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/; $this->{state} = 'autowait'; } elsif ($this->{state} eq 'autowait') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; next if !/^$/; $this->{state} = 'go-nox'; } else { @@ -326,6 +337,9 @@ sub read_log_records log_name => {type => SCALAR, optional => 1, }, + inner_file => {type => BOOLEAN, + default => 0, + }, } ); } @@ -452,6 +466,41 @@ sub escape_log { } +sub record_text { + my ($record) = @_; + if ($record->{inner_file}) { + local $/; + my $text; + my $t = $record->{fh}; + $text = <$t>; + $record->{fh}->seek(0,0); + return $text; + } else { + return $record->{text}; + } +} + +sub record_regex { + my ($record,$regex) = @_; + if ($record->{inner_file}) { + my @result; + my $fh = $record->{fh}; + while (<$fh>) { + if (@result = $_ =~ m/$regex/) { + $record->{fh}->seek(0,0); + return @result; + } + } + $record->{fh}->seek(0,0); + return (); + } else { + my @result = $record->{text} =~ m/$regex/; + return @result; + return $record->{text}; + } +} + + =head1 CAVEATS This module does none of the formatting that bugreport.cgi et al do. It's diff --git a/cgi/bugreport.cgi b/cgi/bugreport.cgi index 9064ca8..0a3071a 100755 --- a/cgi/bugreport.cgi +++ b/cgi/bugreport.cgi @@ -156,7 +156,7 @@ my %status = my @records; eval{ - @records = read_log_records($buglogfh); + @records = read_log_records(logfh => $buglogfh,inner_file => 1); }; if ($@) { quitcgi("Bad bug log for $gBug $ref. Unable to read records: $@"); @@ -225,23 +225,30 @@ END my $wanted_type = $mbox_maint?'recips':'incoming-recv'; # we want to include control messages anyway my $record_wanted_anyway = 0; - my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im; + my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/im); next if defined $msg_id and exists $seen_message_ids{$msg_id}; next if defined $msg_id and $msg_id =~/handler\..+\.ack(?:info|done)?\@/; - $record_wanted_anyway = 1 if $record->{text} =~ /^Received: \(at control\)/; + $record_wanted_anyway = 1 if record_regex($record,qr/^Received: \(at control\)/); next if not $boring and not $record->{type} eq $wanted_type and not $record_wanted_anyway and @records > 1; $seen_message_ids{$msg_id} = 1 if defined $msg_id; - my @lines = split( "\n", $record->{text}, -1 ); + my @lines; + if ($record->{inner_file}) { + push @lines, $record->{fh}->getline; + push @lines, $record->{fh}->getline; + } else { + @lines = split( "\n", $record->{text}, -1 ); + } if ( $lines[ 1 ] =~ m/^From / ) { - my $tmp = $lines[ 0 ]; - $lines[ 0 ] = $lines[ 1 ]; - $lines[ 1 ] = $tmp; + @lines = reverse @lines; } if ( !( $lines[ 0 ] =~ m/^From / ) ) { unshift @lines, "From unknown $date"; } - map { s/^(>*From )/>$1/ } @lines[ 1 .. $#lines ]; - print join( "\n", @lines ) . "\n"; + print map { s/^(>*From )/>$1/; $_."\n" } @lines[ 1 .. $#lines ]; + if ($record->{inner_file}) { + my $fh = $record->{fh}; + print <$fh>; + } } exit 0; } @@ -250,7 +257,7 @@ else { if (defined $att and defined $msg and @records) { binmode(STDOUT,":raw"); $msg_num++; - print handle_email_message($records[0]->{text}, + print handle_email_message($records[0], ref => $ref, msg_num => $msg_num, att => $att,