1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use base qw(Exporter);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
40 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
41 use Params::Validate qw(:types validate_with);
42 use Encode qw(encode is_utf8);
46 Debbugs::Log - an interface to debbugs .log files
50 The Debbugs::Log module provides a convenient way for scripts to read and
51 write the .log files used by debbugs to store the complete textual records
52 of all bug transactions.
54 =head2 The .log File Format
56 .log files consist of a sequence of records, of one of the following four
57 types. ^A, ^B, etc. represent those control characters.
67 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
72 Auto-forwarded messages are recorded like this:
78 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
79 \S+/. The first line matching that is removed; all lines in the message body
80 that begin with 'X' will be copied to the output, minus the 'X'.
82 Nothing in debbugs actually generates this record type any more, but it may
83 still be in old .logs at some sites.
88 [recip]^D[recip]^D[...] OR -t
93 Each [recip] is output after "Message sent"; C<-t> represents the same
94 sendmail option, indicating that the recipients are taken from the headers
95 of the message itself.
103 [html] is copied unescaped to the output. The record immediately following
104 this one is considered "boring" and only shown in certain output modes.
106 (This is a design flaw in the log format, since it makes it difficult to
107 change the HTML presentation later, or to present the data in an entirely
112 No other types of records are permitted, and the file must end with a ^C
123 7 => 'incoming-recv',
126 =head2 Perl Record Representation
128 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
129 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
130 C<[html]> as above; C<recips> is a reference to an array of recipients
131 (strings), or undef for C<-t>.
139 Creates a new log reader based on a .log filehandle.
141 my $log = Debbugs::Log->new($logfh);
142 my $log = Debbugs::Log->new(bug_num => $nnn);
143 my $log = Debbugs::Log->new(logfh => $logfh);
149 =item bug_num -- bug number
151 =item logfh -- log filehandle
153 =item log_name -- name of log
157 One of the above options must be passed.
166 ($param{logfh}) = @_;
169 %param = validate_with(params => \@_,
170 spec => {bug_num => {type => SCALAR,
173 logfh => {type => HANDLE,
176 log_name => {type => SCALAR,
182 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
183 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
186 my $class = ref($this) || $this;
190 if (exists $param{logfh}) {
191 $self->{logfh} = $param{logfh}
193 elsif (exists $param{log_name}) {
194 $self->{logfh} = IO::File->new($param{log_name},'r') or
195 die "Unable to open bug log $param{log_name} for reading: $!";
197 elsif (exists $param{bug_num}) {
198 my $location = getbuglocation($param{bug_num},'log');
199 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
200 $self->{logfh} = IO::File->new($bug_log, 'r') or
201 die "Unable to open bug log $bug_log for reading: $!";
204 binmode($self->{logfh},':utf8');
205 $self->{state} = 'kill-init';
206 $self->{linenum} = 0;
212 Reads and returns a single record from a log reader object. At end of file,
213 returns undef. Throws exceptions using die(), so you may want to wrap this
221 my $logfh = $this->{logfh};
223 # This comes from bugreport.cgi, but is much simpler since it doesn't
224 # worry about the details of output.
228 while (defined (my $line = <$logfh>)) {
231 if (length($line) == 1 and exists $states{ord($line)}) {
233 my $newstate = $states{ord($line)};
235 # disallowed transitions
236 $_ = "$this->{state} $newstate";
237 unless (/^(go|go-nox|html) kill-end$/ or
238 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
240 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
243 $this->{state} = $newstate;
245 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
246 $record->{type} = $this->{state};
247 } elsif ($this->{state} eq 'kill-end') {
255 if ($this->{state} eq 'incoming-recv') {
257 unless (/^Received: \(at \S+\) by \S+;/) {
258 die "bad line '$pl' in state incoming-recv";
260 $this->{state} = 'go';
261 $record->{text} .= "$_\n";
262 } elsif ($this->{state} eq 'html') {
263 $record->{text} .= "$_\n";
264 } elsif ($this->{state} eq 'go') {
266 $record->{text} .= "$_\n";
267 } elsif ($this->{state} eq 'go-nox') {
268 $record->{text} .= "$_\n";
269 } elsif ($this->{state} eq 'recips') {
271 undef $record->{recips};
273 # preserve trailing null fields, e.g. #2298
274 $record->{recips} = [split /\04/, $_, -1];
276 $this->{state} = 'kill-body';
277 } elsif ($this->{state} eq 'autocheck') {
278 $record->{text} .= "$_\n";
279 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
280 $this->{state} = 'autowait';
281 } elsif ($this->{state} eq 'autowait') {
282 $record->{text} .= "$_\n";
284 $this->{state} = 'go-nox';
286 die "state $this->{state} at line $this->{linenum} ('$_')";
289 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
298 =item read_log_records
300 Takes a .log filehandle as input, and returns an array of all records in
301 that file. Throws exceptions using die(), so you may want to wrap this in an
304 Uses exactly the same options as Debbugs::Log::new
312 ($param{logfh}) = @_;
315 %param = validate_with(params => \@_,
316 spec => {bug_num => {type => SCALAR,
319 logfh => {type => HANDLE,
322 log_name => {type => SCALAR,
328 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
329 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
333 my $reader = Debbugs::Log->new(%param);
334 while (defined(my $record = $reader->read_record())) {
335 push @records, $record;
340 =item write_log_records
342 Takes a filehandle and a list of records as input, and prints the .log
343 format representation of those records to that filehandle.
349 sub write_log_records
351 my %param = validate_with(params => \@_,
352 spec => {bug_num => {type => SCALAR,
355 logfh => {type => HANDLE,
358 log_name => {type => SCALAR,
361 records => {type => HASHREF|ARRAYREF,
365 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
366 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
369 if (exists $param{logfh}) {
370 $logfh = $param{logfh}
372 elsif (exists $param{log_name}) {
373 $logfh = IO::File->new(">>$param{log_name}") or
374 die "Unable to open bug log $param{log_name} for writing: $!";
376 elsif (exists $param{bug_num}) {
377 my $location = getbuglocation($param{bug_num},'log');
378 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
379 $logfh = IO::File->new($bug_log, 'r') or
380 die "Unable to open bug log $bug_log for reading: $!";
382 my @records = make_list($param{records});
384 for my $record (@records) {
385 my $type = $record->{type};
386 croak "record type '$type' with no text field" unless defined $record->{text};
387 my ($text) = escape_log($record->{text});
388 if ($type eq 'autocheck') {
389 print {$logfh} "\01\n$text\03\n" or
390 die "Unable to write to logfile: $!";
391 } elsif ($type eq 'recips') {
392 print {$logfh} "\02\n";
393 my $recips = $record->{recips};
394 if (defined $recips) {
395 croak "recips not undef or array"
396 unless ref($recips) eq 'ARRAY';
397 print {$logfh} join("\04", @$recips) . "\n" or
398 die "Unable to write to logfile: $!";
400 print {$logfh} "-t\n" or
401 die "Unable to write to logfile: $!";
403 #$text =~ s/^([\01-\07\030])/\030$1/gm;
404 print {$logfh} "\05\n$text\03\n" or
405 die "Unable to write to logfile: $!";
406 } elsif ($type eq 'html') {
407 print {$logfh} "\06\n$text\03\n" or
408 die "Unable to write to logfile: $!";
409 } elsif ($type eq 'incoming-recv') {
410 #$text =~ s/^([\01-\07\030])/\030$1/gm;
411 print {$logfh} "\07\n$text\03\n" or
412 die "Unable to write to logfile: $!";
414 croak "unknown record type type '$type'";
423 print {$log} escape_log(@log)
425 Applies the log escape regex to the passed logfile.
431 return map { eval {$_ = is_utf8($_)?encode("utf8",$_,Encode::FB_CROAK):$_;}; s/^([\01-\07\030])/\030$1/gm; $_ } @log;
437 This module does none of the formatting that bugreport.cgi et al do. It's
438 simply a means for extracting and rewriting raw records.