1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
17 use v5.10; # for state
19 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
20 use Exporter qw(import);
24 $DEBUG = 0 unless defined $DEBUG;
27 %EXPORT_TAGS = (write => [qw(write_log_records),
29 read => [qw(read_log_records record_text record_regex),
31 misc => [qw(escape_log),
35 Exporter::export_ok_tags(qw(write read misc));
36 $EXPORT_TAGS{all} = [@EXPORT_OK];
41 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
42 use Params::Validate qw(:types validate_with);
43 use Encode qw(encode encode_utf8 is_utf8);
48 Debbugs::Log - an interface to debbugs .log files
52 The Debbugs::Log module provides a convenient way for scripts to read and
53 write the .log files used by debbugs to store the complete textual records
54 of all bug transactions.
56 Debbugs::Log does not decode utf8 into perl's internal encoding or
57 encode into utf8 from perl's internal encoding. For html records and
58 all recips, this should probably be done. For other records, this should
61 =head2 The .log File Format
63 .log files consist of a sequence of records, of one of the following four
64 types. ^A, ^B, etc. represent those control characters.
74 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
79 Auto-forwarded messages are recorded like this:
85 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
86 \S+/. The first line matching that is removed; all lines in the message body
87 that begin with 'X' will be copied to the output, minus the 'X'.
89 Nothing in debbugs actually generates this record type any more, but it may
90 still be in old .logs at some sites.
95 [recip]^D[recip]^D[...] OR -t
100 Each [recip] is output after "Message sent"; C<-t> represents the same
101 sendmail option, indicating that the recipients are taken from the headers
102 of the message itself.
110 [html] is copied unescaped to the output. The record immediately following
111 this one is considered "boring" and only shown in certain output modes.
113 (This is a design flaw in the log format, since it makes it difficult to
114 change the HTML presentation later, or to present the data in an entirely
119 No other types of records are permitted, and the file must end with a ^C
130 7 => 'incoming-recv',
133 =head2 Perl Record Representation
135 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
136 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
137 C<[html]> as above; C<recips> is a reference to an array of recipients
138 (strings), or undef for C<-t>.
146 Creates a new log reader based on a .log filehandle.
148 my $log = Debbugs::Log->new($logfh);
149 my $log = Debbugs::Log->new(bug_num => $nnn);
150 my $log = Debbugs::Log->new(logfh => $logfh);
156 =item bug_num -- bug number
158 =item logfh -- log filehandle
160 =item log_name -- name of log
164 One of the above options must be passed.
169 my ($self,$args) = @_;
170 if (not ($self->_has_bug_num or
172 $self->_has_log_name)) {
173 croak "Exactly one of bug_num, logfh, or log_name ".
174 "must be passed and must be defined";
181 predicate => '_has_bug_num',
187 builder => '_build_logfh',
188 predicate => '_has_logfh',
196 if ($bug_log =~ m/\.gz$/) {
197 my $oldpath = $ENV{'PATH'};
198 $ENV{'PATH'} = '/bin:/usr/bin';
199 open($log_fh,'-|','gzip','-dc',$bug_log) or
200 die "Unable to open $bug_log for reading: $!";
201 $ENV{'PATH'} = $oldpath;
203 open($log_fh,'<',$bug_log) or
204 die "Unable to open $bug_log for reading: $!";
213 builder => '_build_log_name',
214 predicate => '_has_log_name',
217 sub _build_log_name {
219 my $location = getbuglocation($self->bug_num,'log');
220 return getbugcomponent($self->bug_num,'log',$location);
232 default => 'kill-init',
236 sub state_transition {
238 my $new_state = shift;
239 my $old_state = $self->state;
240 local $_ = "$old_state $new_state";
241 unless (/^(go|go-nox|html) kill-end$/ or
242 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
243 /^autocheck autowait$/ or
244 /^autowait go-nox$/ or
245 /^recips kill-body$/ or
246 /^(kill-body|incoming-recv) go$/) {
247 confess "transition from $old_state to $new_state at $self->linenum disallowed";
249 $self->_state($new_state);
252 sub increment_linenum {
254 $self->_linenum($self->_linenum+1);
264 Reads and returns a single record from a log reader object. At end of file,
265 returns undef. Throws exceptions using die(), so you may want to wrap this
273 my $logfh = $this->logfh;
275 # This comes from bugreport.cgi, but is much simpler since it doesn't
276 # worry about the details of output.
280 while (defined (my $line = <$logfh>)) {
281 $record->{start} = $logfh->tell() if not defined $record->{start};
283 $this->increment_linenum;
284 if (length($line) == 1 and exists $states{ord($line)}) {
286 $this->state_transition($states{ord($line)});
287 if ($this->state =~ /^(autocheck|recips|html|incoming-recv)$/) {
288 $record->{type} = $this->state;
289 $record->{start} = $logfh->tell;
290 $record->{stop} = $logfh->tell;
291 $record->{inner_file} = $this->inner_file;
292 } elsif ($this->state eq 'kill-end') {
293 if ($this->inner_file) {
295 IO::InnerFile->new($logfh,$record->{start},
296 $record->{stop} - $record->{start})
303 $record->{stop} = $logfh->tell;
305 if ($this->state eq 'incoming-recv') {
307 unless (/^Received: \(at \S+\) by \S+;/) {
308 die "bad line '$pl' in state incoming-recv";
310 $this->state_transition('go');
311 $record->{text} .= "$_\n" unless $this->inner_file;
312 } elsif ($this->state eq 'html') {
313 $record->{text} .= "$_\n" unless $this->inner_file;
314 } elsif ($this->state eq 'go') {
316 $record->{text} .= "$_\n" unless $this->inner_file;
317 } elsif ($this->state eq 'go-nox') {
318 $record->{text} .= "$_\n" unless $this->inner_file;
319 } elsif ($this->state eq 'recips') {
321 undef $record->{recips};
323 # preserve trailing null fields, e.g. #2298
324 $record->{recips} = [split /\04/, $_, -1];
326 $this->state_transition('kill-body');
327 $record->{start} = $logfh->tell+2;
328 $record->{stop} = $logfh->tell+2;
329 $record->{inner_file} = $this->inner_file;
330 } elsif ($this->state eq 'autocheck') {
331 $record->{text} .= "$_\n" unless $this->inner_file;
332 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
333 $this->state_transition('autowait');
334 } elsif ($this->state eq 'autowait') {
335 $record->{text} .= "$_\n" unless $this->inner_file;
337 $this->state_transition('go-nox');
339 die "state $this->state at line $this->linenum ('$_')";
342 die "state $this->state at end" unless $this->state eq 'kill-end';
353 Rewinds the Debbugs::Log to the beginning
359 if ($self->_has_log_name) {
360 $self->_clear_log_fh;
362 $self->log_fh->seek(0);
364 $self->_state('kill-init');
368 =item read_all_records
370 Reads all of the Debbugs::Records
374 sub read_all_records {
376 if ($self->_linenum != 0) {
380 while (defined(my $record = $self->read_record())) {
381 push @records, $record;
387 =item read_log_records
389 Takes a .log filehandle as input, and returns an array of all records in
390 that file. Throws exceptions using die(), so you may want to wrap this in an
393 Uses exactly the same options as Debbugs::Log::new
401 ($param{logfh}) = @_;
404 %param = validate_with(params => \@_,
405 spec => {bug_num => {type => SCALAR,
408 logfh => {type => HANDLE,
411 log_name => {type => SCALAR,
414 inner_file => {type => BOOLEAN,
420 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
421 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
425 my $reader = Debbugs::Log->new(%param);
426 while (defined(my $record = $reader->read_record())) {
427 push @records, $record;
432 =item write_log_records
434 Takes a filehandle and a list of records as input, and prints the .log
435 format representation of those records to that filehandle.
441 sub write_log_records
443 my %param = validate_with(params => \@_,
444 spec => {bug_num => {type => SCALAR,
447 logfh => {type => HANDLE,
450 log_name => {type => SCALAR,
453 records => {type => HASHREF|ARRAYREF,
457 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
458 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
461 if (exists $param{logfh}) {
462 $logfh = $param{logfh}
464 elsif (exists $param{log_name}) {
465 $logfh = IO::File->new(">>$param{log_name}") or
466 die "Unable to open bug log $param{log_name} for writing: $!";
468 elsif (exists $param{bug_num}) {
469 my $location = getbuglocation($param{bug_num},'log');
470 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
471 $logfh = IO::File->new($bug_log, 'r') or
472 die "Unable to open bug log $bug_log for reading: $!";
474 my @records = make_list($param{records});
476 for my $record (@records) {
477 my $type = $record->{type};
478 croak "record type '$type' with no text field" unless defined $record->{text};
479 # I am not sure if we really want to croak here; but this is
480 # almost certainly a bug if is_utf8 is on.
481 my $text = $record->{text};
482 if (is_utf8($text)) {
483 carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
484 $text = encode_utf8($text)
486 ($text) = escape_log($text);
487 if ($type eq 'autocheck') {
488 print {$logfh} "\01\n$text\03\n" or
489 die "Unable to write to logfile: $!";
490 } elsif ($type eq 'recips') {
491 print {$logfh} "\02\n";
492 my $recips = $record->{recips};
493 if (defined $recips) {
494 croak "recips not undef or array"
495 unless ref($recips) eq 'ARRAY';
496 my $wrong_encoding = 0;
498 map { if (is_utf8($_)) {
504 carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding;
505 print {$logfh} join("\04", @$recips) . "\n" or
506 die "Unable to write to logfile: $!";
508 print {$logfh} "-t\n" or
509 die "Unable to write to logfile: $!";
511 #$text =~ s/^([\01-\07\030])/\030$1/gm;
512 print {$logfh} "\05\n$text\03\n" or
513 die "Unable to write to logfile: $!";
514 } elsif ($type eq 'html') {
515 print {$logfh} "\06\n$text\03\n" or
516 die "Unable to write to logfile: $!";
517 } elsif ($type eq 'incoming-recv') {
518 #$text =~ s/^([\01-\07\030])/\030$1/gm;
519 print {$logfh} "\07\n$text\03\n" or
520 die "Unable to write to logfile: $!";
522 croak "unknown record type type '$type'";
531 print {$log} escape_log(@log)
533 Applies the log escape regex to the passed logfile.
539 return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log;
545 if ($record->{inner_file}) {
548 my $t = $record->{fh};
550 $record->{fh}->seek(0,0);
553 return $record->{text};
558 my ($record,$regex) = @_;
559 if ($record->{inner_file}) {
561 my $fh = $record->{fh};
563 if (@result = $_ =~ m/$regex/) {
564 $record->{fh}->seek(0,0);
568 $record->{fh}->seek(0,0);
571 my @result = $record->{text} =~ m/$regex/;
579 This module does none of the formatting that bugreport.cgi et al do. It's
580 simply a means for extracting and rewriting raw records.
587 # indent-tabs-mode: nil
588 # cperl-indent-level: 4