1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use base qw(Exporter);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
40 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
41 use Params::Validate qw(:types validate_with);
42 use Encode qw(encode is_utf8);
46 Debbugs::Log - an interface to debbugs .log files
50 The Debbugs::Log module provides a convenient way for scripts to read and
51 write the .log files used by debbugs to store the complete textual records
52 of all bug transactions.
54 Debbugs::Log does not decode utf8 into perl's internal encoding or
55 encode into utf8 from perl's internal encoding. For html records and
56 all recips, this should probably be done. For other records, this should
59 =head2 The .log File Format
61 .log files consist of a sequence of records, of one of the following four
62 types. ^A, ^B, etc. represent those control characters.
72 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
77 Auto-forwarded messages are recorded like this:
83 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
84 \S+/. The first line matching that is removed; all lines in the message body
85 that begin with 'X' will be copied to the output, minus the 'X'.
87 Nothing in debbugs actually generates this record type any more, but it may
88 still be in old .logs at some sites.
93 [recip]^D[recip]^D[...] OR -t
98 Each [recip] is output after "Message sent"; C<-t> represents the same
99 sendmail option, indicating that the recipients are taken from the headers
100 of the message itself.
108 [html] is copied unescaped to the output. The record immediately following
109 this one is considered "boring" and only shown in certain output modes.
111 (This is a design flaw in the log format, since it makes it difficult to
112 change the HTML presentation later, or to present the data in an entirely
117 No other types of records are permitted, and the file must end with a ^C
128 7 => 'incoming-recv',
131 =head2 Perl Record Representation
133 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
134 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
135 C<[html]> as above; C<recips> is a reference to an array of recipients
136 (strings), or undef for C<-t>.
144 Creates a new log reader based on a .log filehandle.
146 my $log = Debbugs::Log->new($logfh);
147 my $log = Debbugs::Log->new(bug_num => $nnn);
148 my $log = Debbugs::Log->new(logfh => $logfh);
154 =item bug_num -- bug number
156 =item logfh -- log filehandle
158 =item log_name -- name of log
162 One of the above options must be passed.
171 ($param{logfh}) = @_;
174 %param = validate_with(params => \@_,
175 spec => {bug_num => {type => SCALAR,
178 logfh => {type => HANDLE,
181 log_name => {type => SCALAR,
187 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
188 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
191 my $class = ref($this) || $this;
195 if (exists $param{logfh}) {
196 $self->{logfh} = $param{logfh}
198 elsif (exists $param{log_name}) {
199 $self->{logfh} = IO::File->new($param{log_name},'r') or
200 die "Unable to open bug log $param{log_name} for reading: $!";
202 elsif (exists $param{bug_num}) {
203 my $location = getbuglocation($param{bug_num},'log');
204 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
205 $self->{logfh} = IO::File->new($bug_log, 'r') or
206 die "Unable to open bug log $bug_log for reading: $!";
209 $self->{state} = 'kill-init';
210 $self->{linenum} = 0;
216 Reads and returns a single record from a log reader object. At end of file,
217 returns undef. Throws exceptions using die(), so you may want to wrap this
225 my $logfh = $this->{logfh};
227 # This comes from bugreport.cgi, but is much simpler since it doesn't
228 # worry about the details of output.
232 while (defined (my $line = <$logfh>)) {
235 if (length($line) == 1 and exists $states{ord($line)}) {
237 my $newstate = $states{ord($line)};
239 # disallowed transitions
240 $_ = "$this->{state} $newstate";
241 unless (/^(go|go-nox|html) kill-end$/ or
242 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
244 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
247 $this->{state} = $newstate;
249 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
250 $record->{type} = $this->{state};
251 } elsif ($this->{state} eq 'kill-end') {
259 if ($this->{state} eq 'incoming-recv') {
261 unless (/^Received: \(at \S+\) by \S+;/) {
262 die "bad line '$pl' in state incoming-recv";
264 $this->{state} = 'go';
265 $record->{text} .= "$_\n";
266 } elsif ($this->{state} eq 'html') {
267 $record->{text} .= "$_\n";
268 } elsif ($this->{state} eq 'go') {
270 $record->{text} .= "$_\n";
271 } elsif ($this->{state} eq 'go-nox') {
272 $record->{text} .= "$_\n";
273 } elsif ($this->{state} eq 'recips') {
275 undef $record->{recips};
277 # preserve trailing null fields, e.g. #2298
278 $record->{recips} = [split /\04/, $_, -1];
280 $this->{state} = 'kill-body';
281 } elsif ($this->{state} eq 'autocheck') {
282 $record->{text} .= "$_\n";
283 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
284 $this->{state} = 'autowait';
285 } elsif ($this->{state} eq 'autowait') {
286 $record->{text} .= "$_\n";
288 $this->{state} = 'go-nox';
290 die "state $this->{state} at line $this->{linenum} ('$_')";
293 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
302 =item read_log_records
304 Takes a .log filehandle as input, and returns an array of all records in
305 that file. Throws exceptions using die(), so you may want to wrap this in an
308 Uses exactly the same options as Debbugs::Log::new
316 ($param{logfh}) = @_;
319 %param = validate_with(params => \@_,
320 spec => {bug_num => {type => SCALAR,
323 logfh => {type => HANDLE,
326 log_name => {type => SCALAR,
332 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
333 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
337 my $reader = Debbugs::Log->new(%param);
338 while (defined(my $record = $reader->read_record())) {
339 push @records, $record;
344 =item write_log_records
346 Takes a filehandle and a list of records as input, and prints the .log
347 format representation of those records to that filehandle.
353 sub write_log_records
355 my %param = validate_with(params => \@_,
356 spec => {bug_num => {type => SCALAR,
359 logfh => {type => HANDLE,
362 log_name => {type => SCALAR,
365 records => {type => HASHREF|ARRAYREF,
369 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
370 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
373 if (exists $param{logfh}) {
374 $logfh = $param{logfh}
376 elsif (exists $param{log_name}) {
377 $logfh = IO::File->new(">>$param{log_name}") or
378 die "Unable to open bug log $param{log_name} for writing: $!";
380 elsif (exists $param{bug_num}) {
381 my $location = getbuglocation($param{bug_num},'log');
382 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
383 $logfh = IO::File->new($bug_log, 'r') or
384 die "Unable to open bug log $bug_log for reading: $!";
386 my @records = make_list($param{records});
388 for my $record (@records) {
389 my $type = $record->{type};
390 croak "record type '$type' with no text field" unless defined $record->{text};
391 # I am not sure if we really want to croak here; but this is
392 # almost certainly a bug if is_utf8 is on.
393 # croak "probably wrong encoding" if is_utf8($record->{text});
394 my ($text) = escape_log($record->{text});
395 if ($type eq 'autocheck') {
396 print {$logfh} "\01\n$text\03\n" or
397 die "Unable to write to logfile: $!";
398 } elsif ($type eq 'recips') {
399 print {$logfh} "\02\n";
400 my $recips = $record->{recips};
401 if (defined $recips) {
402 croak "recips not undef or array"
403 unless ref($recips) eq 'ARRAY';
404 print {$logfh} join("\04", @$recips) . "\n" or
405 die "Unable to write to logfile: $!";
407 print {$logfh} "-t\n" or
408 die "Unable to write to logfile: $!";
410 #$text =~ s/^([\01-\07\030])/\030$1/gm;
411 print {$logfh} "\05\n$text\03\n" or
412 die "Unable to write to logfile: $!";
413 } elsif ($type eq 'html') {
414 print {$logfh} "\06\n$text\03\n" or
415 die "Unable to write to logfile: $!";
416 } elsif ($type eq 'incoming-recv') {
417 #$text =~ s/^([\01-\07\030])/\030$1/gm;
418 print {$logfh} "\07\n$text\03\n" or
419 die "Unable to write to logfile: $!";
421 croak "unknown record type type '$type'";
430 print {$log} escape_log(@log)
432 Applies the log escape regex to the passed logfile.
438 return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log;
444 This module does none of the formatting that bugreport.cgi et al do. It's
445 simply a means for extracting and rewriting raw records.