1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use base qw(Exporter);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
40 use Debbugs::Common qw(getbuglocation getbugcomponent);
41 use Params::Validate qw(:types validate_with);
45 Debbugs::Log - an interface to debbugs .log files
49 The Debbugs::Log module provides a convenient way for scripts to read and
50 write the .log files used by debbugs to store the complete textual records
51 of all bug transactions.
53 =head2 The .log File Format
55 .log files consist of a sequence of records, of one of the following four
56 types. ^A, ^B, etc. represent those control characters.
66 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
71 Auto-forwarded messages are recorded like this:
77 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
78 \S+/. The first line matching that is removed; all lines in the message body
79 that begin with 'X' will be copied to the output, minus the 'X'.
81 Nothing in debbugs actually generates this record type any more, but it may
82 still be in old .logs at some sites.
87 [recip]^D[recip]^D[...] OR -t
92 Each [recip] is output after "Message sent"; C<-t> represents the same
93 sendmail option, indicating that the recipients are taken from the headers
94 of the message itself.
102 [html] is copied unescaped to the output. The record immediately following
103 this one is considered "boring" and only shown in certain output modes.
105 (This is a design flaw in the log format, since it makes it difficult to
106 change the HTML presentation later, or to present the data in an entirely
111 No other types of records are permitted, and the file must end with a ^C
122 7 => 'incoming-recv',
125 =head2 Perl Record Representation
127 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
128 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
129 C<[html]> as above; C<recips> is a reference to an array of recipients
130 (strings), or undef for C<-t>.
138 Creates a new log reader based on a .log filehandle.
140 my $log = Debbugs::Log->new($logfh);
141 my $log = Debbugs::Log->new(bug_num => $nnn);
142 my $log = Debbugs::Log->new(logfh => $logfh);
148 =item bug_num -- bug number
150 =item logfh -- log filehandle
152 =item log_name -- name of log
156 One of the above options must be passed.
165 ($param{logfh}) = @_;
168 %param = validate_with(params => \@_,
169 spec => {bug_num => {type => SCALAR,
172 logfh => {type => SCALAR,
175 log_name => {type => SCALAR,
181 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
182 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
185 my $class = ref($this) || $this;
189 if (exists $param{logfh}) {
190 $self->{logfh} = $param{logfh}
192 elsif (exists $param{log_name}) {
193 $self->{logfh} = IO::File->new($param{log_name},'r') or
194 die "Unable to open bug log $param{log_name} for reading: $!";
196 elsif (exists $param{bug_num}) {
197 my $location = getbuglocation($param{bug_num},'log');
198 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
199 $self->{logfh} = IO::File->new($bug_log, 'r') or
200 die "Unable to open bug log $bug_log for reading: $!";
203 $self->{state} = 'kill-init';
204 $self->{linenum} = 0;
210 Reads and returns a single record from a log reader object. At end of file,
211 returns undef. Throws exceptions using die(), so you may want to wrap this
219 my $logfh = $this->{logfh};
221 # This comes from bugreport.cgi, but is much simpler since it doesn't
222 # worry about the details of output.
226 while (defined (my $line = <$logfh>)) {
229 if (length($line) == 1 and exists $states{ord($line)}) {
231 my $newstate = $states{ord($line)};
233 # disallowed transitions
234 $_ = "$this->{state} $newstate";
235 unless (/^(go|go-nox|html) kill-end$/ or
236 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
238 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
241 $this->{state} = $newstate;
243 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
244 $record->{type} = $this->{state};
245 } elsif ($this->{state} eq 'kill-end') {
253 if ($this->{state} eq 'incoming-recv') {
255 unless (/^Received: \(at \S+\) by \S+;/) {
256 die "bad line '$pl' in state incoming-recv";
258 $this->{state} = 'go';
259 $record->{text} .= "$_\n";
260 } elsif ($this->{state} eq 'html') {
261 $record->{text} .= "$_\n";
262 } elsif ($this->{state} eq 'go') {
264 $record->{text} .= "$_\n";
265 } elsif ($this->{state} eq 'go-nox') {
266 $record->{text} .= "$_\n";
267 } elsif ($this->{state} eq 'recips') {
269 undef $record->{recips};
271 # preserve trailing null fields, e.g. #2298
272 $record->{recips} = [split /\04/, $_, -1];
274 $this->{state} = 'kill-body';
275 } elsif ($this->{state} eq 'autocheck') {
276 $record->{text} .= "$_\n";
277 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
278 $this->{state} = 'autowait';
279 } elsif ($this->{state} eq 'autowait') {
280 $record->{text} .= "$_\n";
282 $this->{state} = 'go-nox';
284 die "state $this->{state} at line $this->{linenum} ('$_')";
287 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
296 =item read_log_records
298 Takes a .log filehandle as input, and returns an array of all records in
299 that file. Throws exceptions using die(), so you may want to wrap this in an
304 sub read_log_records (*)
309 my $reader = Debbugs::Log->new($logfh);
310 while (defined(my $record = $reader->read_record())) {
311 push @records, $record;
316 =item write_log_records
318 Takes a filehandle and a list of records as input, and prints the .log
319 format representation of those records to that filehandle.
323 sub write_log_records (*@)
328 for my $record (@records) {
329 my $type = $record->{type};
330 my ($text) = escape_log($record->{text});
331 die "type '$type' with no text field" unless defined $text;
332 if ($type eq 'autocheck') {
333 print $logfh "\01\n$text\03\n";
334 } elsif ($type eq 'recips') {
335 print $logfh "\02\n";
336 my $recips = $record->{recips};
337 if (defined $recips) {
338 die "recips not undef or array"
339 unless ref($recips) eq 'ARRAY';
340 print $logfh join("\04", @$recips) . "\n";
344 #$text =~ s/^([\01-\07\030])/\030$1/gm;
345 print $logfh "\05\n$text\03\n";
346 } elsif ($type eq 'html') {
347 print $logfh "\06\n$text\03\n";
348 } elsif ($type eq 'incoming-recv') {
349 #$text =~ s/^([\01-\07\030])/\030$1/gm;
350 print $logfh "\07\n$text\03\n";
352 die "unknown type '$type'";
361 print {$log} escape_log(@log)
363 Applies the log escape regex to the passed logfile.
369 return map { s/^([\01-\07\030])/\030$1/gm; $_ } @log;
377 This module does none of the formatting that bugreport.cgi et al do. It's
378 simply a means for extracting and rewriting raw records.