Use the inner_file handle in cgi/bugreport and Debbugs::CGI::Bugreport
to avoid having the entire file in memory all of the time
Eventually we will want to use an index to avoid having to read every
single line of the bug log for every single message. Some day. [If
you're reading this, you could make that day sooner!]
use Debbugs::Common qw(globify_scalar english_join);
use Debbugs::UTF8;
use Debbugs::Config qw(:config);
use Debbugs::Common qw(globify_scalar english_join);
use Debbugs::UTF8;
use Debbugs::Config qw(:config);
+use Debbugs::Log qw(:read);
use POSIX qw(strftime);
use Encode qw(decode_utf8 encode_utf8);
use URI::Escape qw(uri_escape_utf8);
use POSIX qw(strftime);
use Encode qw(decode_utf8 encode_utf8);
use URI::Escape qw(uri_escape_utf8);
=cut
sub handle_email_message{
=cut
sub handle_email_message{
- my ($email,%param) = @_;
+ my ($record,%param) = @_;
my $output;
my $output_fh = globify_scalar(\$output);
my $entity;
my $tempdir;
my $output;
my $output_fh = globify_scalar(\$output);
my $entity;
my $tempdir;
- if (not blessed $email) {
+ if (not blessed $record) {
my $parser = MIME::Parser->new();
# Because we are using memory, not tempfiles, there's no need to
# clean up here like in Debbugs::MIME
# this will be cleaned up once it goes out of scope
$tempdir = File::Temp->newdir();
$parser->output_under($tempdir->dirname());
my $parser = MIME::Parser->new();
# Because we are using memory, not tempfiles, there's no need to
# clean up here like in Debbugs::MIME
# this will be cleaned up once it goes out of scope
$tempdir = File::Temp->newdir();
$parser->output_under($tempdir->dirname());
- $entity = $parser->parse_data( $email);
+ if ($record->{inner_file}) {
+ $entity = $parser->parse($record->{fh}) or
+ die "Unable to parse entity";
+ } else {
+ $entity = $parser->parse_data($record->{text}) or
+ die "Unable to parse entity";
+ }
}
my @attachments = ();
my $raw_output =
}
my @attachments = ();
my $raw_output =
local $_ = $record->{type};
if (/html/) {
# $record->{text} is not in perl's internal encoding; convert it
local $_ = $record->{type};
if (/html/) {
# $record->{text} is not in perl's internal encoding; convert it
- my $text = decode_rfc1522(decode_utf8($record->{text}));
+ my $text = decode_rfc1522(decode_utf8(record_text($record)));
my ($time) = $text =~ /<!--\s+time:(\d+)\s+-->/;
my $class = $text =~ /^<strong>(?:Acknowledgement|Reply|Information|Report|Notification)/m ? 'infmessage':'msgreceived';
$output .= $text;
my ($time) = $text =~ /<!--\s+time:(\d+)\s+-->/;
my $class = $text =~ /^<strong>(?:Acknowledgement|Reply|Information|Report|Notification)/m ? 'infmessage':'msgreceived';
$output .= $text;
$output = qq(<div class="$class"><hr>\n<a name="$msg_number"></a>\n) . $output . "</div>\n";
}
elsif (/recips/) {
$output = qq(<div class="$class"><hr>\n<a name="$msg_number"></a>\n) . $output . "</div>\n";
}
elsif (/recips/) {
- my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
- if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
+ my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/i);
+ if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
return ();
}
elsif (defined $msg_id) {
return ();
}
elsif (defined $msg_id) {
}
$output .= qq(<hr><p class="msgreceived"><a name="$msg_number"></a>\n);
$output .= 'View this message in <a href="' . html_escape(bug_links(bug=>$bug_number, links_only => 1, options=>{msg=>$msg_number, mbox=>'yes'})) . '">rfc822 format</a></p>';
}
$output .= qq(<hr><p class="msgreceived"><a name="$msg_number"></a>\n);
$output .= 'View this message in <a href="' . html_escape(bug_links(bug=>$bug_number, links_only => 1, options=>{msg=>$msg_number, mbox=>'yes'})) . '">rfc822 format</a></p>';
- $output .= handle_email_message($record->{text},
+ $output .= handle_email_message($record,
ref => $bug_number,
msg_num => $msg_number,
%param,
ref => $bug_number,
msg_num => $msg_number,
%param,
# Do nothing
}
elsif (/incoming-recv/) {
# Do nothing
}
elsif (/incoming-recv/) {
- my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
+ my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/i);
if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
return ();
}
if (defined $msg_id and exists $$seen_msg_ids{$msg_id}) {
return ();
}
$$seen_msg_ids{$msg_id} = 1;
}
# Incomming Mail Message
$$seen_msg_ids{$msg_id} = 1;
}
# Incomming Mail Message
- my ($received,$hostname) = $record->{text} =~ m/Received: \(at (\S+)\) by (\S+)\;/;
+ my ($received,$hostname) = record_regex($record,qr/Received: \(at (\S+)\) by (\S+)\;/o);
$output .= qq|<hr><p class="msgreceived"><a name="$msg_number"></a><a name="msg$msg_number"></a><a href="#$msg_number">Message #$msg_number</a> received at |.
html_escape("$received\@$hostname") .
q| (<a href="| . html_escape(bug_links(bug => $bug_number, links_only => 1, options => {msg=>$msg_number})) . '">full text</a>'.
$output .= qq|<hr><p class="msgreceived"><a name="$msg_number"></a><a name="msg$msg_number"></a><a href="#$msg_number">Message #$msg_number</a> received at |.
html_escape("$received\@$hostname") .
q| (<a href="| . html_escape(bug_links(bug => $bug_number, links_only => 1, options => {msg=>$msg_number})) . '">full text</a>'.
# this will be cleaned up once it goes out of scope
my $tempdir = File::Temp->newdir();
$parser->output_under($tempdir->dirname());
# this will be cleaned up once it goes out of scope
my $tempdir = File::Temp->newdir();
$parser->output_under($tempdir->dirname());
- my $entity = $parser->parse_data($record->{text});
+ my $entity;
+ if ($record->{inner_file}) {
+ $entity = $parser->parse($record->{fh});
+ } else {
+ $entity = $parser->parse_data($record->{text});
+ }
my $r_l = reply_headers($entity);
$output .= q(<a href=").
html_escape('mailto:'.$bug_number.'@'.$config{email_domain}.'?'.
my $r_l = reply_headers($entity);
$output .= q(<a href=").
html_escape('mailto:'.$bug_number.'@'.$config{email_domain}.'?'.
@EXPORT = ();
%EXPORT_TAGS = (write => [qw(write_log_records),
],
@EXPORT = ();
%EXPORT_TAGS = (write => [qw(write_log_records),
],
- read => [qw(read_log_records),
+ read => [qw(read_log_records record_text record_regex),
],
misc => [qw(escape_log),
],
],
misc => [qw(escape_log),
],
use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
use Params::Validate qw(:types validate_with);
use Encode qw(encode encode_utf8 is_utf8);
use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
use Params::Validate qw(:types validate_with);
use Encode qw(encode encode_utf8 is_utf8);
my %param;
if (@_ == 1) {
($param{logfh}) = @_;
my %param;
if (@_ == 1) {
($param{logfh}) = @_;
+ $param{inner_file} = 0;
}
else {
%param = validate_with(params => \@_,
}
else {
%param = validate_with(params => \@_,
},
log_name => {type => SCALAR,
optional => 1,
},
log_name => {type => SCALAR,
optional => 1,
+ },
+ inner_file => {type => BOOLEAN,
+ default => 0,
+ },
$self->{state} = 'kill-init';
$self->{linenum} = 0;
$self->{state} = 'kill-init';
$self->{linenum} = 0;
+ $self->{inner_file} = $param{inner_file};
}
$this->{state} = $newstate;
}
$this->{state} = $newstate;
if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
- $record->{type} = $this->{state};
+ $record->{type} = $this->{state};
+ $record->{start} = $logfh->tell;
+ $record->{stop} = $logfh->tell;
+ $record->{inner_file} = $this->{inner_file};
} elsif ($this->{state} eq 'kill-end') {
} elsif ($this->{state} eq 'kill-end') {
+ if ($this->{inner_file}) {
+ $record->{fh} = IO::InnerFile->new($logfh,$record->{start},$record->{stop} - $record->{start})
+ }
return $record;
}
next;
}
return $record;
}
next;
}
+ $record->{stop} = $logfh->tell;
$_ = $line;
if ($this->{state} eq 'incoming-recv') {
my $pl = $_;
$_ = $line;
if ($this->{state} eq 'incoming-recv') {
my $pl = $_;
die "bad line '$pl' in state incoming-recv";
}
$this->{state} = 'go';
die "bad line '$pl' in state incoming-recv";
}
$this->{state} = 'go';
- $record->{text} .= "$_\n";
+ $record->{text} .= "$_\n" unless $this->{inner_file};
} elsif ($this->{state} eq 'html') {
} elsif ($this->{state} eq 'html') {
- $record->{text} .= "$_\n";
+ $record->{text} .= "$_\n" unless $this->{inner_file};
} elsif ($this->{state} eq 'go') {
s/^\030//;
} elsif ($this->{state} eq 'go') {
s/^\030//;
- $record->{text} .= "$_\n";
+ $record->{text} .= "$_\n" unless $this->{inner_file};
} elsif ($this->{state} eq 'go-nox') {
} elsif ($this->{state} eq 'go-nox') {
- $record->{text} .= "$_\n";
+ $record->{text} .= "$_\n" unless $this->{inner_file};
} elsif ($this->{state} eq 'recips') {
if (/^-t$/) {
undef $record->{recips};
} elsif ($this->{state} eq 'recips') {
if (/^-t$/) {
undef $record->{recips};
}
$this->{state} = 'kill-body';
} elsif ($this->{state} eq 'autocheck') {
}
$this->{state} = 'kill-body';
} elsif ($this->{state} eq 'autocheck') {
- $record->{text} .= "$_\n";
+ $record->{text} .= "$_\n" unless $this->{inner_file};
next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
$this->{state} = 'autowait';
} elsif ($this->{state} eq 'autowait') {
next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
$this->{state} = 'autowait';
} elsif ($this->{state} eq 'autowait') {
- $record->{text} .= "$_\n";
+ $record->{text} .= "$_\n" unless $this->{inner_file};
next if !/^$/;
$this->{state} = 'go-nox';
} else {
next if !/^$/;
$this->{state} = 'go-nox';
} else {
log_name => {type => SCALAR,
optional => 1,
},
log_name => {type => SCALAR,
optional => 1,
},
+ inner_file => {type => BOOLEAN,
+ default => 0,
+ },
+sub record_text {
+ my ($record) = @_;
+ if ($record->{inner_file}) {
+ local $/;
+ my $text;
+ my $t = $record->{fh};
+ $text = <$t>;
+ $record->{fh}->seek(0,0);
+ return $text;
+ } else {
+ return $record->{text};
+ }
+}
+
+sub record_regex {
+ my ($record,$regex) = @_;
+ if ($record->{inner_file}) {
+ my @result;
+ my $fh = $record->{fh};
+ while (<$fh>) {
+ if (@result = $_ =~ m/$regex/) {
+ $record->{fh}->seek(0,0);
+ return @result;
+ }
+ }
+ $record->{fh}->seek(0,0);
+ return ();
+ } else {
+ my @result = $record->{text} =~ m/$regex/;
+ return @result;
+ return $record->{text};
+ }
+}
+
+
=head1 CAVEATS
This module does none of the formatting that bugreport.cgi et al do. It's
=head1 CAVEATS
This module does none of the formatting that bugreport.cgi et al do. It's
- @records = read_log_records($buglogfh);
+ @records = read_log_records(logfh => $buglogfh,inner_file => 1);
};
if ($@) {
quitcgi("Bad bug log for $gBug $ref. Unable to read records: $@");
};
if ($@) {
quitcgi("Bad bug log for $gBug $ref. Unable to read records: $@");
my $wanted_type = $mbox_maint?'recips':'incoming-recv';
# we want to include control messages anyway
my $record_wanted_anyway = 0;
my $wanted_type = $mbox_maint?'recips':'incoming-recv';
# we want to include control messages anyway
my $record_wanted_anyway = 0;
- my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
+ my ($msg_id) = record_regex($record,qr/^Message-Id:\s+<(.+)>/im);
next if defined $msg_id and exists $seen_message_ids{$msg_id};
next if defined $msg_id and $msg_id =~/handler\..+\.ack(?:info|done)?\@/;
next if defined $msg_id and exists $seen_message_ids{$msg_id};
next if defined $msg_id and $msg_id =~/handler\..+\.ack(?:info|done)?\@/;
- $record_wanted_anyway = 1 if $record->{text} =~ /^Received: \(at control\)/;
+ $record_wanted_anyway = 1 if record_regex($record,qr/^Received: \(at control\)/);
next if not $boring and not $record->{type} eq $wanted_type and not $record_wanted_anyway and @records > 1;
$seen_message_ids{$msg_id} = 1 if defined $msg_id;
next if not $boring and not $record->{type} eq $wanted_type and not $record_wanted_anyway and @records > 1;
$seen_message_ids{$msg_id} = 1 if defined $msg_id;
- my @lines = split( "\n", $record->{text}, -1 );
+ my @lines;
+ if ($record->{inner_file}) {
+ push @lines, $record->{fh}->getline;
+ push @lines, $record->{fh}->getline;
+ } else {
+ @lines = split( "\n", $record->{text}, -1 );
+ }
if ( $lines[ 1 ] =~ m/^From / ) {
if ( $lines[ 1 ] =~ m/^From / ) {
- my $tmp = $lines[ 0 ];
- $lines[ 0 ] = $lines[ 1 ];
- $lines[ 1 ] = $tmp;
+ @lines = reverse @lines;
}
if ( !( $lines[ 0 ] =~ m/^From / ) ) {
unshift @lines, "From unknown $date";
}
}
if ( !( $lines[ 0 ] =~ m/^From / ) ) {
unshift @lines, "From unknown $date";
}
- map { s/^(>*From )/>$1/ } @lines[ 1 .. $#lines ];
- print join( "\n", @lines ) . "\n";
+ print map { s/^(>*From )/>$1/; $_."\n" } @lines[ 1 .. $#lines ];
+ if ($record->{inner_file}) {
+ my $fh = $record->{fh};
+ print <$fh>;
+ }
if (defined $att and defined $msg and @records) {
binmode(STDOUT,":raw");
$msg_num++;
if (defined $att and defined $msg and @records) {
binmode(STDOUT,":raw");
$msg_num++;
- print handle_email_message($records[0]->{text},
+ print handle_email_message($records[0],
ref => $ref,
msg_num => $msg_num,
att => $att,
ref => $ref,
msg_num => $msg_num,
att => $att,