1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use base qw(Exporter);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
40 Debbugs::Log - an interface to debbugs .log files
44 The Debbugs::Log module provides a convenient way for scripts to read and
45 write the .log files used by debbugs to store the complete textual records
46 of all bug transactions.
48 =head2 The .log File Format
50 .log files consist of a sequence of records, of one of the following four
51 types. ^A, ^B, etc. represent those control characters.
61 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
66 Auto-forwarded messages are recorded like this:
72 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
73 \S+/. The first line matching that is removed; all lines in the message body
74 that begin with 'X' will be copied to the output, minus the 'X'.
76 Nothing in debbugs actually generates this record type any more, but it may
77 still be in old .logs at some sites.
82 [recip]^D[recip]^D[...] OR -t
87 Each [recip] is output after "Message sent"; C<-t> represents the same
88 sendmail option, indicating that the recipients are taken from the headers
89 of the message itself.
97 [html] is copied unescaped to the output. The record immediately following
98 this one is considered "boring" and only shown in certain output modes.
100 (This is a design flaw in the log format, since it makes it difficult to
101 change the HTML presentation later, or to present the data in an entirely
106 No other types of records are permitted, and the file must end with a ^C
117 7 => 'incoming-recv',
120 =head2 Perl Record Representation
122 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
123 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
124 C<[html]> as above; C<recips> is a reference to an array of recipients
125 (strings), or undef for C<-t>.
133 Creates a new log reader based on a .log filehandle.
140 my $class = ref($this) || $this;
143 $self->{logfh} = shift;
144 $self->{state} = 'kill-init';
145 $self->{linenum} = 0;
151 Reads and returns a single record from a log reader object. At end of file,
152 returns undef. Throws exceptions using die(), so you may want to wrap this
160 my $logfh = $this->{logfh};
162 # This comes from bugreport.cgi, but is much simpler since it doesn't
163 # worry about the details of output.
167 while (defined (my $line = <$logfh>)) {
170 if (length($line) == 1 and exists $states{ord($line)}) {
172 my $newstate = $states{ord($line)};
174 # disallowed transitions
175 $_ = "$this->{state} $newstate";
176 unless (/^(go|go-nox|html) kill-end$/ or
177 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
179 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
182 $this->{state} = $newstate;
184 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
185 $record->{type} = $this->{state};
186 } elsif ($this->{state} eq 'kill-end') {
194 if ($this->{state} eq 'incoming-recv') {
196 unless (/^Received: \(at \S+\) by \S+;/) {
197 die "bad line '$pl' in state incoming-recv";
199 $this->{state} = 'go';
200 $record->{text} .= "$_\n";
201 } elsif ($this->{state} eq 'html') {
202 $record->{text} .= "$_\n";
203 } elsif ($this->{state} eq 'go') {
205 $record->{text} .= "$_\n";
206 } elsif ($this->{state} eq 'go-nox') {
207 $record->{text} .= "$_\n";
208 } elsif ($this->{state} eq 'recips') {
210 undef $record->{recips};
212 # preserve trailing null fields, e.g. #2298
213 $record->{recips} = [split /\04/, $_, -1];
215 $this->{state} = 'kill-body';
216 } elsif ($this->{state} eq 'autocheck') {
217 $record->{text} .= "$_\n";
218 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
219 $this->{state} = 'autowait';
220 } elsif ($this->{state} eq 'autowait') {
221 $record->{text} .= "$_\n";
223 $this->{state} = 'go-nox';
225 die "state $this->{state} at line $this->{linenum} ('$_')";
228 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
237 =item read_log_records
239 Takes a .log filehandle as input, and returns an array of all records in
240 that file. Throws exceptions using die(), so you may want to wrap this in an
245 sub read_log_records (*)
250 my $reader = Debbugs::Log->new($logfh);
251 while (defined(my $record = $reader->read_record())) {
252 push @records, $record;
257 =item write_log_records
259 Takes a filehandle and a list of records as input, and prints the .log
260 format representation of those records to that filehandle.
264 sub write_log_records (*@)
269 for my $record (@records) {
270 my $type = $record->{type};
271 my ($text) = escape_log($record->{text});
272 die "type '$type' with no text field" unless defined $text;
273 if ($type eq 'autocheck') {
274 print $logfh "\01\n$text\03\n";
275 } elsif ($type eq 'recips') {
276 print $logfh "\02\n";
277 my $recips = $record->{recips};
278 if (defined $recips) {
279 die "recips not undef or array"
280 unless ref($recips) eq 'ARRAY';
281 print $logfh join("\04", @$recips) . "\n";
285 #$text =~ s/^([\01-\07\030])/\030$1/gm;
286 print $logfh "\05\n$text\03\n";
287 } elsif ($type eq 'html') {
288 print $logfh "\06\n$text\03\n";
289 } elsif ($type eq 'incoming-recv') {
290 #$text =~ s/^([\01-\07\030])/\030$1/gm;
291 print $logfh "\07\n$text\03\n";
293 die "unknown type '$type'";
302 print {$log} escape_log(@log)
304 Applies the log escape regex to the passed logfile.
310 return map { s/^([\01-\07\030])/\030$1/gm; $_ } @log;
318 This module does none of the formatting that bugreport.cgi et al do. It's
319 simply a means for extracting and rewriting raw records.