1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use base qw(Exporter);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
39 use Debbugs::Common qw(getbuglocation getbugcomponent);
40 use Params::Validate qw(:types validate_with);
44 Debbugs::Log - an interface to debbugs .log files
48 The Debbugs::Log module provides a convenient way for scripts to read and
49 write the .log files used by debbugs to store the complete textual records
50 of all bug transactions.
52 =head2 The .log File Format
54 .log files consist of a sequence of records, of one of the following four
55 types. ^A, ^B, etc. represent those control characters.
65 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
70 Auto-forwarded messages are recorded like this:
76 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
77 \S+/. The first line matching that is removed; all lines in the message body
78 that begin with 'X' will be copied to the output, minus the 'X'.
80 Nothing in debbugs actually generates this record type any more, but it may
81 still be in old .logs at some sites.
86 [recip]^D[recip]^D[...] OR -t
91 Each [recip] is output after "Message sent"; C<-t> represents the same
92 sendmail option, indicating that the recipients are taken from the headers
93 of the message itself.
101 [html] is copied unescaped to the output. The record immediately following
102 this one is considered "boring" and only shown in certain output modes.
104 (This is a design flaw in the log format, since it makes it difficult to
105 change the HTML presentation later, or to present the data in an entirely
110 No other types of records are permitted, and the file must end with a ^C
121 7 => 'incoming-recv',
124 =head2 Perl Record Representation
126 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
127 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
128 C<[html]> as above; C<recips> is a reference to an array of recipients
129 (strings), or undef for C<-t>.
137 Creates a new log reader based on a .log filehandle.
139 my $log = Debbugs::Log->new($logfh);
140 my $log = Debbugs::Log->new(bug_num => $nnn);
141 my $log = Debbugs::Log->new(logfh => $logfh);
147 =item bug_num -- bug number
149 =item logfh -- log filehandle
151 =item log_name -- name of log
155 One of the above options must be passed.
164 ($param{logfh}) = @_;
167 %param = validate_with(params => @_,
168 spec => {bug_num => {type => SCALAR,
171 logfh => {type => SCALAR,
174 log_name => {type => SCALAR,
180 if (grep {exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name) ne 3) {
181 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
184 my $class = ref($this) || $this;
188 if (exists $param{logfh}) {
189 $self->{logfh} = $param{logfh}
191 elsif (exists $param{log_name}) {
192 $self->{logfh} = IO::File->new($param{log_name},'r') or
193 die "Unable to open bug log $param{log_name} for reading: $!";
195 elsif (exists $param{bug_num}) {
196 my $location = getbuglocation($bug,'log');
197 my $bug_log = getbugcomponent($bug,'log',$location);
198 $self->{logfh} = IO::File->new($bug_log, 'r') or
199 die "Unable to open bug log $bug_log for reading: $!";
202 $self->{state} = 'kill-init';
203 $self->{linenum} = 0;
209 Reads and returns a single record from a log reader object. At end of file,
210 returns undef. Throws exceptions using die(), so you may want to wrap this
218 my $logfh = $this->{logfh};
220 # This comes from bugreport.cgi, but is much simpler since it doesn't
221 # worry about the details of output.
225 while (defined (my $line = <$logfh>)) {
228 if (length($line) == 1 and exists $states{ord($line)}) {
230 my $newstate = $states{ord($line)};
232 # disallowed transitions
233 $_ = "$this->{state} $newstate";
234 unless (/^(go|go-nox|html) kill-end$/ or
235 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
237 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
240 $this->{state} = $newstate;
242 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
243 $record->{type} = $this->{state};
244 } elsif ($this->{state} eq 'kill-end') {
252 if ($this->{state} eq 'incoming-recv') {
254 unless (/^Received: \(at \S+\) by \S+;/) {
255 die "bad line '$pl' in state incoming-recv";
257 $this->{state} = 'go';
258 $record->{text} .= "$_\n";
259 } elsif ($this->{state} eq 'html') {
260 $record->{text} .= "$_\n";
261 } elsif ($this->{state} eq 'go') {
263 $record->{text} .= "$_\n";
264 } elsif ($this->{state} eq 'go-nox') {
265 $record->{text} .= "$_\n";
266 } elsif ($this->{state} eq 'recips') {
268 undef $record->{recips};
270 # preserve trailing null fields, e.g. #2298
271 $record->{recips} = [split /\04/, $_, -1];
273 $this->{state} = 'kill-body';
274 } elsif ($this->{state} eq 'autocheck') {
275 $record->{text} .= "$_\n";
276 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
277 $this->{state} = 'autowait';
278 } elsif ($this->{state} eq 'autowait') {
279 $record->{text} .= "$_\n";
281 $this->{state} = 'go-nox';
283 die "state $this->{state} at line $this->{linenum} ('$_')";
286 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
295 =item read_log_records
297 Takes a .log filehandle as input, and returns an array of all records in
298 that file. Throws exceptions using die(), so you may want to wrap this in an
303 sub read_log_records (*)
308 my $reader = Debbugs::Log->new($logfh);
309 while (defined(my $record = $reader->read_record())) {
310 push @records, $record;
315 =item write_log_records
317 Takes a filehandle and a list of records as input, and prints the .log
318 format representation of those records to that filehandle.
322 sub write_log_records (*@)
327 for my $record (@records) {
328 my $type = $record->{type};
329 my ($text) = escape_log($record->{text});
330 die "type '$type' with no text field" unless defined $text;
331 if ($type eq 'autocheck') {
332 print $logfh "\01\n$text\03\n";
333 } elsif ($type eq 'recips') {
334 print $logfh "\02\n";
335 my $recips = $record->{recips};
336 if (defined $recips) {
337 die "recips not undef or array"
338 unless ref($recips) eq 'ARRAY';
339 print $logfh join("\04", @$recips) . "\n";
343 #$text =~ s/^([\01-\07\030])/\030$1/gm;
344 print $logfh "\05\n$text\03\n";
345 } elsif ($type eq 'html') {
346 print $logfh "\06\n$text\03\n";
347 } elsif ($type eq 'incoming-recv') {
348 #$text =~ s/^([\01-\07\030])/\030$1/gm;
349 print $logfh "\07\n$text\03\n";
351 die "unknown type '$type'";
360 print {$log} escape_log(@log)
362 Applies the log escape regex to the passed logfile.
368 return map { s/^([\01-\07\030])/\030$1/gm; $_ } @log;
376 This module does none of the formatting that bugreport.cgi et al do. It's
377 simply a means for extracting and rewriting raw records.