1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use Exporter qw(import);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records record_text record_regex),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
40 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
41 use Params::Validate qw(:types validate_with);
42 use Encode qw(encode encode_utf8 is_utf8);
48 Debbugs::Log - an interface to debbugs .log files
52 The Debbugs::Log module provides a convenient way for scripts to read and
53 write the .log files used by debbugs to store the complete textual records
54 of all bug transactions.
56 Debbugs::Log does not decode utf8 into perl's internal encoding or
57 encode into utf8 from perl's internal encoding. For html records and
58 all recips, this should probably be done. For other records, this should
61 =head2 The .log File Format
63 .log files consist of a sequence of records, of one of the following four
64 types. ^A, ^B, etc. represent those control characters.
74 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
79 Auto-forwarded messages are recorded like this:
85 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
86 \S+/. The first line matching that is removed; all lines in the message body
87 that begin with 'X' will be copied to the output, minus the 'X'.
89 Nothing in debbugs actually generates this record type any more, but it may
90 still be in old .logs at some sites.
95 [recip]^D[recip]^D[...] OR -t
100 Each [recip] is output after "Message sent"; C<-t> represents the same
101 sendmail option, indicating that the recipients are taken from the headers
102 of the message itself.
110 [html] is copied unescaped to the output. The record immediately following
111 this one is considered "boring" and only shown in certain output modes.
113 (This is a design flaw in the log format, since it makes it difficult to
114 change the HTML presentation later, or to present the data in an entirely
119 No other types of records are permitted, and the file must end with a ^C
130 7 => 'incoming-recv',
133 =head2 Perl Record Representation
135 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
136 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
137 C<[html]> as above; C<recips> is a reference to an array of recipients
138 (strings), or undef for C<-t>.
146 Creates a new log reader based on a .log filehandle.
148 my $log = Debbugs::Log->new($logfh);
149 my $log = Debbugs::Log->new(bug_num => $nnn);
150 my $log = Debbugs::Log->new(logfh => $logfh);
156 =item bug_num -- bug number
158 =item logfh -- log filehandle
160 =item log_name -- name of log
164 One of the above options must be passed.
173 ($param{logfh}) = @_;
174 $param{inner_file} = 0;
178 {bug_num => {type => SCALAR,
181 logfh => {type => HANDLE,
184 log_name => {type => SCALAR,
187 inner_file => {type => BOOLEAN,
191 %param = validate_with(params => \@_,
195 if (grep({exists $param{$_} and defined $param{$_}}
196 qw(bug_num logfh log_name)) ne 1) {
197 croak "Exactly one of bug_num, logfh, or log_name ".
198 "must be passed and must be defined";
201 my $class = ref($this) || $this;
205 if (exists $param{logfh}) {
206 $self->{logfh} = $param{logfh}
209 if (exists $param{bug_num}) {
210 my $location = getbuglocation($param{bug_num},'log');
211 $bug_log = getbugcomponent($param{bug_num},'log',$location);
213 $bug_log = $param{log_name};
215 if ($bug_log =~ m/\.gz$/) {
216 my $oldpath = $ENV{'PATH'};
217 $ENV{'PATH'} = '/bin:/usr/bin';
218 open($self->{logfh},'-|','gzip','-dc',$bug_log) or
219 die "Unable to open $bug_log for reading: $!";
220 $ENV{'PATH'} = $oldpath;
222 open($self->{logfh},'<',$bug_log) or
223 die "Unable to open $bug_log for reading: $!";
227 $self->{state} = 'kill-init';
228 $self->{linenum} = 0;
229 $self->{inner_file} = $param{inner_file};
235 Reads and returns a single record from a log reader object. At end of file,
236 returns undef. Throws exceptions using die(), so you may want to wrap this
244 my $logfh = $this->{logfh};
246 # This comes from bugreport.cgi, but is much simpler since it doesn't
247 # worry about the details of output.
251 while (defined (my $line = <$logfh>)) {
252 $record->{start} = $logfh->tell() if not defined $record->{start};
255 if (length($line) == 1 and exists $states{ord($line)}) {
257 my $newstate = $states{ord($line)};
259 # disallowed transitions
260 $_ = "$this->{state} $newstate";
261 unless (/^(go|go-nox|html) kill-end$/ or
262 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
264 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
267 $this->{state} = $newstate;
268 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
269 $record->{type} = $this->{state};
270 $record->{start} = $logfh->tell;
271 $record->{stop} = $logfh->tell;
272 $record->{inner_file} = $this->{inner_file};
273 } elsif ($this->{state} eq 'kill-end') {
274 if ($this->{inner_file}) {
275 $record->{fh} = IO::InnerFile->new($logfh,$record->{start},$record->{stop} - $record->{start})
282 $record->{stop} = $logfh->tell;
284 if ($this->{state} eq 'incoming-recv') {
286 unless (/^Received: \(at \S+\) by \S+;/) {
287 die "bad line '$pl' in state incoming-recv";
289 $this->{state} = 'go';
290 $record->{text} .= "$_\n" unless $this->{inner_file};
291 } elsif ($this->{state} eq 'html') {
292 $record->{text} .= "$_\n" unless $this->{inner_file};
293 } elsif ($this->{state} eq 'go') {
295 $record->{text} .= "$_\n" unless $this->{inner_file};
296 } elsif ($this->{state} eq 'go-nox') {
297 $record->{text} .= "$_\n" unless $this->{inner_file};
298 } elsif ($this->{state} eq 'recips') {
300 undef $record->{recips};
302 # preserve trailing null fields, e.g. #2298
303 $record->{recips} = [split /\04/, $_, -1];
305 $this->{state} = 'kill-body';
306 $record->{start} = $logfh->tell+2;
307 $record->{stop} = $logfh->tell+2;
308 $record->{inner_file} = $this->{inner_file};
309 } elsif ($this->{state} eq 'autocheck') {
310 $record->{text} .= "$_\n" unless $this->{inner_file};
311 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
312 $this->{state} = 'autowait';
313 } elsif ($this->{state} eq 'autowait') {
314 $record->{text} .= "$_\n" unless $this->{inner_file};
316 $this->{state} = 'go-nox';
318 die "state $this->{state} at line $this->{linenum} ('$_')";
321 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
330 =item read_log_records
332 Takes a .log filehandle as input, and returns an array of all records in
333 that file. Throws exceptions using die(), so you may want to wrap this in an
336 Uses exactly the same options as Debbugs::Log::new
344 ($param{logfh}) = @_;
347 %param = validate_with(params => \@_,
348 spec => {bug_num => {type => SCALAR,
351 logfh => {type => HANDLE,
354 log_name => {type => SCALAR,
357 inner_file => {type => BOOLEAN,
363 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
364 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
368 my $reader = Debbugs::Log->new(%param);
369 while (defined(my $record = $reader->read_record())) {
370 push @records, $record;
375 =item write_log_records
377 Takes a filehandle and a list of records as input, and prints the .log
378 format representation of those records to that filehandle.
384 sub write_log_records
386 my %param = validate_with(params => \@_,
387 spec => {bug_num => {type => SCALAR,
390 logfh => {type => HANDLE,
393 log_name => {type => SCALAR,
396 records => {type => HASHREF|ARRAYREF,
400 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
401 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
404 if (exists $param{logfh}) {
405 $logfh = $param{logfh}
407 elsif (exists $param{log_name}) {
408 $logfh = IO::File->new(">>$param{log_name}") or
409 die "Unable to open bug log $param{log_name} for writing: $!";
411 elsif (exists $param{bug_num}) {
412 my $location = getbuglocation($param{bug_num},'log');
413 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
414 $logfh = IO::File->new($bug_log, 'r') or
415 die "Unable to open bug log $bug_log for reading: $!";
417 my @records = make_list($param{records});
419 for my $record (@records) {
420 my $type = $record->{type};
421 croak "record type '$type' with no text field" unless defined $record->{text};
422 # I am not sure if we really want to croak here; but this is
423 # almost certainly a bug if is_utf8 is on.
424 my $text = $record->{text};
425 if (is_utf8($text)) {
426 carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
427 $text = encode_utf8($text)
429 ($text) = escape_log($text);
430 if ($type eq 'autocheck') {
431 print {$logfh} "\01\n$text\03\n" or
432 die "Unable to write to logfile: $!";
433 } elsif ($type eq 'recips') {
434 print {$logfh} "\02\n";
435 my $recips = $record->{recips};
436 if (defined $recips) {
437 croak "recips not undef or array"
438 unless ref($recips) eq 'ARRAY';
439 my $wrong_encoding = 0;
441 map { if (is_utf8($_)) {
447 carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding;
448 print {$logfh} join("\04", @$recips) . "\n" or
449 die "Unable to write to logfile: $!";
451 print {$logfh} "-t\n" or
452 die "Unable to write to logfile: $!";
454 #$text =~ s/^([\01-\07\030])/\030$1/gm;
455 print {$logfh} "\05\n$text\03\n" or
456 die "Unable to write to logfile: $!";
457 } elsif ($type eq 'html') {
458 print {$logfh} "\06\n$text\03\n" or
459 die "Unable to write to logfile: $!";
460 } elsif ($type eq 'incoming-recv') {
461 #$text =~ s/^([\01-\07\030])/\030$1/gm;
462 print {$logfh} "\07\n$text\03\n" or
463 die "Unable to write to logfile: $!";
465 croak "unknown record type type '$type'";
474 print {$log} escape_log(@log)
476 Applies the log escape regex to the passed logfile.
482 return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log;
488 if ($record->{inner_file}) {
491 my $t = $record->{fh};
493 $record->{fh}->seek(0,0);
496 return $record->{text};
501 my ($record,$regex) = @_;
502 if ($record->{inner_file}) {
504 my $fh = $record->{fh};
506 if (@result = $_ =~ m/$regex/) {
507 $record->{fh}->seek(0,0);
511 $record->{fh}->seek(0,0);
514 my @result = $record->{text} =~ m/$regex/;
522 This module does none of the formatting that bugreport.cgi et al do. It's
523 simply a means for extracting and rewriting raw records.
530 # indent-tabs-mode: nil
531 # cperl-indent-level: 4