6 use vars qw($VERSION @ISA @EXPORT);
12 @EXPORT = qw(read_log_records write_log_records);
17 Debbugs::Log - an interface to debbugs .log files
21 The Debbugs::Log module provides a convenient way for scripts to read and
22 write the .log files used by debbugs to store the complete textual records
23 of all bug transactions.
25 =head2 The .log File Format
27 .log files consist of a sequence of records, of one of the following four
28 types. ^A, ^B, etc. represent those control characters.
38 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
43 Auto-forwarded messages are recorded like this:
49 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
50 \S+/. The first line matching that is removed; all lines in the message body
51 that begin with 'X' will be copied to the output, minus the 'X'.
53 Nothing in debbugs actually generates this record type any more, but it may
54 still be in old .logs at some sites.
59 [recip]^D[recip]^D[...] OR -t
64 Each [recip] is output after "Message sent"; C<-t> represents the same
65 sendmail option, indicating that the recipients are taken from the headers
66 of the message itself.
74 [html] is copied unescaped to the output. The record immediately following
75 this one is considered "boring" and only shown in certain output modes.
77 (This is a design flaw in the log format, since it makes it difficult to
78 change the HTML presentation later, or to present the data in an entirely
83 No other types of records are permitted, and the file must end with a ^C
97 =head2 Perl Record Representation
99 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
100 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
101 C<[html]> as above; C<recips> is a reference to an array of recipients
102 (strings), or undef for C<-t>.
110 Creates a new log reader based on a .log filehandle.
117 my $class = ref($this) || $this;
120 $self->{logfh} = shift;
121 $self->{state} = 'kill-init';
122 $self->{linenum} = 0;
128 Reads and returns a single record from a log reader object. At end of file,
129 returns undef. Throws exceptions using die(), so you may want to wrap this
137 my $logfh = $this->{logfh};
139 # This comes from bugreport.cgi, but is much simpler since it doesn't
140 # worry about the details of output.
144 while (defined (my $line = <$logfh>)) {
147 if (length($line) == 1 and exists $states{ord($line)}) {
149 my $newstate = $states{ord($line)};
151 # disallowed transitions
152 $_ = "$this->{state} $newstate";
153 unless (/^(go|go-nox|html) kill-end$/ or
154 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
156 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
159 $this->{state} = $newstate;
161 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
162 $record->{type} = $this->{state};
163 } elsif ($this->{state} eq 'kill-end') {
171 if ($this->{state} eq 'incoming-recv') {
173 unless (/^Received: \(at \S+\) by \S+;/) {
174 die "bad line '$pl' in state incoming-recv";
176 $this->{state} = 'go';
177 $record->{text} .= "$_\n";
178 } elsif ($this->{state} eq 'html') {
179 $record->{text} .= "$_\n";
180 } elsif ($this->{state} eq 'go') {
182 $record->{text} .= "$_\n";
183 } elsif ($this->{state} eq 'go-nox') {
184 $record->{text} .= "$_\n";
185 } elsif ($this->{state} eq 'recips') {
187 undef $record->{recips};
189 # preserve trailing null fields, e.g. #2298
190 $record->{recips} = [split /\04/, $_, -1];
192 $this->{state} = 'kill-body';
193 } elsif ($this->{state} eq 'autocheck') {
194 $record->{text} .= "$_\n";
195 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
196 $this->{state} = 'autowait';
197 } elsif ($this->{state} eq 'autowait') {
198 $record->{text} .= "$_\n";
200 $this->{state} = 'go-nox';
202 die "state $this->{state} at line $this->{linenum} ('$_')";
205 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
214 =item read_log_records
216 Takes a .log filehandle as input, and returns an array of all records in
217 that file. Throws exceptions using die(), so you may want to wrap this in an
222 sub read_log_records (*)
227 my $reader = Debbugs::Log->new($logfh);
228 while (defined(my $record = $reader->read_record())) {
229 push @records, $record;
234 =item write_log_records
236 Takes a filehandle and a list of records as input, and prints the .log
237 format representation of those records to that filehandle.
241 sub write_log_records (*@)
246 for my $record (@records) {
247 my $type = $record->{type};
248 my $text = $record->{text};
249 die "type '$type' with no text field" unless defined $text;
250 if ($type eq 'autocheck') {
251 print $logfh "\01\n$text\03\n";
252 } elsif ($type eq 'recips') {
253 print $logfh "\02\n";
254 my $recips = $record->{recips};
255 if (defined $recips) {
256 die "recips not undef or array"
257 unless ref($recips) eq 'ARRAY';
258 print $logfh join("\04", @$recips) . "\n";
262 $text =~ s/^([\01-\07\030])/\030$1/gm;
263 print $logfh "\05\n$text\03\n";
264 } elsif ($type eq 'html') {
265 print $logfh "\06\n$text\03\n";
266 } elsif ($type eq 'incoming-recv') {
267 $text =~ s/^([\01-\07\030])/\030$1/gm;
268 print $logfh "\07\n$text\03\n";
270 die "unknown type '$type'";
281 This module does none of the formatting that bugreport.cgi et al do. It's
282 simply a means for extracting and rewriting raw records.