1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
17 use v5.10; # for state
19 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
20 use Exporter qw(import);
24 $DEBUG = 0 unless defined $DEBUG;
27 %EXPORT_TAGS = (write => [qw(write_log_records),
29 read => [qw(read_log_records record_text record_regex),
31 misc => [qw(escape_log),
35 Exporter::export_ok_tags(qw(write read misc));
36 $EXPORT_TAGS{all} = [@EXPORT_OK];
41 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
42 use Params::Validate qw(:types validate_with);
43 use Encode qw(encode encode_utf8 is_utf8);
45 use Debbugs::Log::Record;
49 Debbugs::Log - an interface to debbugs .log files
53 The Debbugs::Log module provides a convenient way for scripts to read and
54 write the .log files used by debbugs to store the complete textual records
55 of all bug transactions.
57 Debbugs::Log does not decode utf8 into perl's internal encoding or
58 encode into utf8 from perl's internal encoding. For html records and
59 all recips, this should probably be done. For other records, this should
62 =head2 The .log File Format
64 .log files consist of a sequence of records, of one of the following four
65 types. ^A, ^B, etc. represent those control characters.
75 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
80 Auto-forwarded messages are recorded like this:
86 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
87 \S+/. The first line matching that is removed; all lines in the message body
88 that begin with 'X' will be copied to the output, minus the 'X'.
90 Nothing in debbugs actually generates this record type any more, but it may
91 still be in old .logs at some sites.
96 [recip]^D[recip]^D[...] OR -t
101 Each [recip] is output after "Message sent"; C<-t> represents the same
102 sendmail option, indicating that the recipients are taken from the headers
103 of the message itself.
111 [html] is copied unescaped to the output. The record immediately following
112 this one is considered "boring" and only shown in certain output modes.
114 (This is a design flaw in the log format, since it makes it difficult to
115 change the HTML presentation later, or to present the data in an entirely
120 No other types of records are permitted, and the file must end with a ^C
131 7 => 'incoming-recv',
134 =head2 Perl Record Representation
136 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
137 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
138 C<[html]> as above; C<recips> is a reference to an array of recipients
139 (strings), or undef for C<-t>.
147 Creates a new log reader based on a .log filehandle.
149 my $log = Debbugs::Log->new($logfh);
150 my $log = Debbugs::Log->new(bug_num => $nnn);
151 my $log = Debbugs::Log->new(logfh => $logfh);
157 =item bug_num -- bug number
159 =item logfh -- log filehandle
161 =item log_name -- name of log
165 One of the above options must be passed.
170 my ($self,$args) = @_;
171 if (not ($self->_has_bug_num or
173 $self->_has_log_name)) {
174 croak "Exactly one of bug_num, logfh, or log_name ".
175 "must be passed and must be defined";
182 predicate => '_has_bug_num',
188 builder => '_build_logfh',
189 predicate => '_has_logfh',
197 if ($bug_log =~ m/\.gz$/) {
198 my $oldpath = $ENV{'PATH'};
199 $ENV{'PATH'} = '/bin:/usr/bin';
200 open($log_fh,'-|','gzip','-dc',$bug_log) or
201 die "Unable to open $bug_log for reading: $!";
202 $ENV{'PATH'} = $oldpath;
204 open($log_fh,'<',$bug_log) or
205 die "Unable to open $bug_log for reading: $!";
214 builder => '_build_log_name',
215 predicate => '_has_log_name',
218 sub _build_log_name {
220 my $location = getbuglocation($self->bug_num,'log');
221 return getbugcomponent($self->bug_num,'log',$location);
233 default => 'kill-init',
237 sub state_transition {
239 my $new_state = shift;
240 my $old_state = $self->state;
241 local $_ = "$old_state $new_state";
242 unless (/^(go|go-nox|html) kill-end$/ or
243 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
244 /^autocheck autowait$/ or
245 /^autowait go-nox$/ or
246 /^recips kill-body$/ or
247 /^(kill-body|incoming-recv) go$/) {
248 confess "transition from $old_state to $new_state at $self->linenum disallowed";
250 $self->_state($new_state);
253 sub increment_linenum {
255 $self->_linenum($self->_linenum+1);
273 if (not defined ref($_[0])) {
274 @records = [@records];
276 for my $record (@records) {
277 if (not isa($record,'Debbugs::Log::Record')) {
278 if (ref($record) ne 'ARRAY') {
279 croak("Debbugs::Log->write must be passed either a Debbugs::Log::Record or an ARRAYREF");
281 $record = Debbugs::Log::Record->new(@{$record});
289 Reads and returns a single record from a log reader object. At end of file,
290 returns undef. Throws exceptions using die(), so you may want to wrap this
298 my $logfh = $this->logfh;
300 # This comes from bugreport.cgi, but is much simpler since it doesn't
301 # worry about the details of output.
304 while (defined (my $line = <$logfh>)) {
305 if (not defined $record) {
307 Debbugs::Log::Record->new(log_fh => $logfh,
308 start => $logfh->tell()
312 $this->increment_linenum;
313 $record->stop($logfh->tell);
314 if (length($line) == 1 and exists $states{ord($line)}) {
316 $this->state_transition($states{ord($line)});
317 if ($this->state =~ /^(autocheck|recips|html|incoming-recv)$/) {
318 $record->type($this->state);
319 } elsif ($this->state eq 'kill-end') {
325 if ($this->state eq 'incoming-recv') {
327 unless (/^Received: \(at \S+\) by \S+;/) {
328 die "bad line '$pl' in state incoming-recv";
330 $this->state_transition('go');
331 $record->add_text($_."\n");
332 } elsif ($this->state eq 'html') {
333 $record->add_text($_."\n");
334 } elsif ($this->state eq 'go') {
336 $record->add_text($_."\n");
337 } elsif ($this->state eq 'go-nox') {
338 $record->add_text($_."\n");
339 } elsif ($this->state eq 'recips') {
341 $record->recipients([]);
343 # preserve trailing null fields, e.g. #2298
344 $record->recipients([split /\04/, $_, -1]);
346 $this->state_transition('kill-body');
347 $record->start($logfh->tell+2);
348 $record->stop($logfh->tell+2);
349 } elsif ($this->state eq 'autocheck') {
350 $record->add_text($_."\n");
351 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
352 $this->state_transition('autowait');
353 } elsif ($this->state eq 'autowait') {
354 $record->add_text($_."\n");
356 $this->state_transition('go-nox');
358 die "state $this->state at line $this->linenum ('$_')";
361 die "state $this->state at end" unless $this->state eq 'kill-end';
368 Rewinds the Debbugs::Log to the beginning
374 if ($self->_has_log_name) {
375 $self->_clear_log_fh;
377 $self->log_fh->seek(0);
379 $self->_state('kill-init');
383 =item read_all_records
385 Reads all of the Debbugs::Records
389 sub read_all_records {
391 if ($self->_linenum != 0) {
395 while (defined(my $record = $self->read_record())) {
396 push @records, $record;
402 =item read_log_records
404 Takes a .log filehandle as input, and returns an array of all records in
405 that file. Throws exceptions using die(), so you may want to wrap this in an
408 Uses exactly the same options as Debbugs::Log::new
416 ($param{logfh}) = @_;
419 %param = validate_with(params => \@_,
420 spec => {bug_num => {type => SCALAR,
423 logfh => {type => HANDLE,
426 log_name => {type => SCALAR,
429 inner_file => {type => BOOLEAN,
435 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
436 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
440 my $reader = Debbugs::Log->new(%param);
441 while (defined(my $record = $reader->read_record())) {
442 push @records, $record;
447 =item write_log_records
449 Takes a filehandle and a list of records as input, and prints the .log
450 format representation of those records to that filehandle.
456 sub write_log_records
458 my %param = validate_with(params => \@_,
459 spec => {bug_num => {type => SCALAR,
462 logfh => {type => HANDLE,
465 log_name => {type => SCALAR,
468 records => {type => HASHREF|ARRAYREF,
472 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
473 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
476 if (exists $param{logfh}) {
477 $logfh = $param{logfh}
479 elsif (exists $param{log_name}) {
480 $logfh = IO::File->new(">>$param{log_name}") or
481 die "Unable to open bug log $param{log_name} for writing: $!";
483 elsif (exists $param{bug_num}) {
484 my $location = getbuglocation($param{bug_num},'log');
485 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
486 $logfh = IO::File->new($bug_log, 'r') or
487 die "Unable to open bug log $bug_log for reading: $!";
489 my @records = make_list($param{records});
491 for my $record (@records) {
492 my $type = $record->type;
493 croak "record type '$type' with no text field" unless defined $record->text;
494 # I am not sure if we really want to croak here; but this is
495 # almost certainly a bug if is_utf8 is on.
496 my $text = $record->text;
497 if (is_utf8($text)) {
498 carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
499 $text = encode_utf8($text)
501 ($text) = escape_log($text);
502 if ($type eq 'autocheck') {
503 print {$logfh} "\01\n$text\03\n" or
504 die "Unable to write to logfile: $!";
505 } elsif ($type eq 'recips') {
506 print {$logfh} "\02\n";
507 my $recips = $record->recipients;
508 if (defined $recips) {
509 croak "recips not undef or array"
510 unless ref($recips) eq 'ARRAY';
511 my $wrong_encoding = 0;
513 map { if (is_utf8($_)) {
519 carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding;
520 print {$logfh} join("\04", @$recips) . "\n" or
521 die "Unable to write to logfile: $!";
523 print {$logfh} "-t\n" or
524 die "Unable to write to logfile: $!";
526 #$text =~ s/^([\01-\07\030])/\030$1/gm;
527 print {$logfh} "\05\n$text\03\n" or
528 die "Unable to write to logfile: $!";
529 } elsif ($type eq 'html') {
530 print {$logfh} "\06\n$text\03\n" or
531 die "Unable to write to logfile: $!";
532 } elsif ($type eq 'incoming-recv') {
533 #$text =~ s/^([\01-\07\030])/\030$1/gm;
534 print {$logfh} "\07\n$text\03\n" or
535 die "Unable to write to logfile: $!";
537 croak "unknown record type type '$type'";
546 print {$log} escape_log(@log)
548 Applies the log escape regex to the passed logfile.
554 return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log;
560 if ($record->{inner_file}) {
563 my $t = $record->{fh};
565 $record->{fh}->seek(0,0);
568 return $record->{text};
573 my ($record,$regex) = @_;
574 if ($record->{inner_file}) {
576 my $fh = $record->{fh};
578 if (@result = $_ =~ m/$regex/) {
579 $record->{fh}->seek(0,0);
583 $record->{fh}->seek(0,0);
586 my @result = $record->{text} =~ m/$regex/;
594 This module does none of the formatting that bugreport.cgi et al do. It's
595 simply a means for extracting and rewriting raw records.
602 # indent-tabs-mode: nil
603 # cperl-indent-level: 4