1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # [Other people have contributed to this file; their copyrights should
8 # Copyright 2004 by Collin Watson <cjwatson@debian.org>
9 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>
18 use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS);
19 use Exporter qw(import);
23 $DEBUG = 0 unless defined $DEBUG;
26 %EXPORT_TAGS = (write => [qw(write_log_records),
28 read => [qw(read_log_records record_text record_regex),
30 misc => [qw(escape_log),
34 Exporter::export_ok_tags(qw(write read misc));
35 $EXPORT_TAGS{all} = [@EXPORT_OK];
40 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
41 use Params::Validate qw(:types validate_with);
42 use Encode qw(encode encode_utf8 is_utf8);
47 Debbugs::Log - an interface to debbugs .log files
51 The Debbugs::Log module provides a convenient way for scripts to read and
52 write the .log files used by debbugs to store the complete textual records
53 of all bug transactions.
55 Debbugs::Log does not decode utf8 into perl's internal encoding or
56 encode into utf8 from perl's internal encoding. For html records and
57 all recips, this should probably be done. For other records, this should
60 =head2 The .log File Format
62 .log files consist of a sequence of records, of one of the following four
63 types. ^A, ^B, etc. represent those control characters.
73 C<[mail]> must start with /^Received: \(at \S+\) by \S+;/, and is copied to
78 Auto-forwarded messages are recorded like this:
84 C<[mail]> must contain /^X-Debian-Bugs(-\w+)?: This is an autoforward from
85 \S+/. The first line matching that is removed; all lines in the message body
86 that begin with 'X' will be copied to the output, minus the 'X'.
88 Nothing in debbugs actually generates this record type any more, but it may
89 still be in old .logs at some sites.
94 [recip]^D[recip]^D[...] OR -t
99 Each [recip] is output after "Message sent"; C<-t> represents the same
100 sendmail option, indicating that the recipients are taken from the headers
101 of the message itself.
109 [html] is copied unescaped to the output. The record immediately following
110 this one is considered "boring" and only shown in certain output modes.
112 (This is a design flaw in the log format, since it makes it difficult to
113 change the HTML presentation later, or to present the data in an entirely
118 No other types of records are permitted, and the file must end with a ^C
129 7 => 'incoming-recv',
132 =head2 Perl Record Representation
134 Each record is a hash. The C<type> field is C<incoming-recv>, C<autocheck>,
135 C<recips>, or C<html> as above; C<text> contains text from C<[mail]> or
136 C<[html]> as above; C<recips> is a reference to an array of recipients
137 (strings), or undef for C<-t>.
145 Creates a new log reader based on a .log filehandle.
147 my $log = Debbugs::Log->new($logfh);
148 my $log = Debbugs::Log->new(bug_num => $nnn);
149 my $log = Debbugs::Log->new(logfh => $logfh);
155 =item bug_num -- bug number
157 =item logfh -- log filehandle
159 =item log_name -- name of log
163 One of the above options must be passed.
172 ($param{logfh}) = @_;
173 $param{inner_file} = 0;
176 %param = validate_with(params => \@_,
177 spec => {bug_num => {type => SCALAR,
180 logfh => {type => HANDLE,
183 log_name => {type => SCALAR,
186 inner_file => {type => BOOLEAN,
192 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
193 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
196 my $class = ref($this) || $this;
200 if (exists $param{logfh}) {
201 $self->{logfh} = $param{logfh}
203 elsif (exists $param{log_name}) {
204 $self->{logfh} = IO::File->new($param{log_name},'r') or
205 die "Unable to open bug log $param{log_name} for reading: $!";
207 elsif (exists $param{bug_num}) {
208 my $location = getbuglocation($param{bug_num},'log');
209 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
210 $self->{logfh} = IO::File->new($bug_log, 'r') or
211 die "Unable to open bug log $bug_log for reading: $!";
214 $self->{state} = 'kill-init';
215 $self->{linenum} = 0;
216 $self->{inner_file} = $param{inner_file};
222 Reads and returns a single record from a log reader object. At end of file,
223 returns undef. Throws exceptions using die(), so you may want to wrap this
231 my $logfh = $this->{logfh};
233 # This comes from bugreport.cgi, but is much simpler since it doesn't
234 # worry about the details of output.
238 while (defined (my $line = <$logfh>)) {
239 $record->{start} = $logfh->tell() if not defined $record->{start};
242 if (length($line) == 1 and exists $states{ord($line)}) {
244 my $newstate = $states{ord($line)};
246 # disallowed transitions
247 $_ = "$this->{state} $newstate";
248 unless (/^(go|go-nox|html) kill-end$/ or
249 /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or
251 die "transition from $this->{state} to $newstate at $this->{linenum} disallowed";
254 $this->{state} = $newstate;
255 if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
256 $record->{type} = $this->{state};
257 $record->{start} = $logfh->tell;
258 $record->{stop} = $logfh->tell;
259 $record->{inner_file} = $this->{inner_file};
260 } elsif ($this->{state} eq 'kill-end') {
261 if ($this->{inner_file}) {
262 $record->{fh} = IO::InnerFile->new($logfh,$record->{start},$record->{stop} - $record->{start})
269 $record->{stop} = $logfh->tell;
271 if ($this->{state} eq 'incoming-recv') {
273 unless (/^Received: \(at \S+\) by \S+;/) {
274 die "bad line '$pl' in state incoming-recv";
276 $this->{state} = 'go';
277 $record->{text} .= "$_\n" unless $this->{inner_file};
278 } elsif ($this->{state} eq 'html') {
279 $record->{text} .= "$_\n" unless $this->{inner_file};
280 } elsif ($this->{state} eq 'go') {
282 $record->{text} .= "$_\n" unless $this->{inner_file};
283 } elsif ($this->{state} eq 'go-nox') {
284 $record->{text} .= "$_\n" unless $this->{inner_file};
285 } elsif ($this->{state} eq 'recips') {
287 undef $record->{recips};
289 # preserve trailing null fields, e.g. #2298
290 $record->{recips} = [split /\04/, $_, -1];
292 $this->{state} = 'kill-body';
293 $record->{start} = $logfh->tell+2;
294 $record->{stop} = $logfh->tell+2;
295 $record->{inner_file} = $this->{inner_file};
296 } elsif ($this->{state} eq 'autocheck') {
297 $record->{text} .= "$_\n" unless $this->{inner_file};
298 next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
299 $this->{state} = 'autowait';
300 } elsif ($this->{state} eq 'autowait') {
301 $record->{text} .= "$_\n" unless $this->{inner_file};
303 $this->{state} = 'go-nox';
305 die "state $this->{state} at line $this->{linenum} ('$_')";
308 die "state $this->{state} at end" unless $this->{state} eq 'kill-end';
317 =item read_log_records
319 Takes a .log filehandle as input, and returns an array of all records in
320 that file. Throws exceptions using die(), so you may want to wrap this in an
323 Uses exactly the same options as Debbugs::Log::new
331 ($param{logfh}) = @_;
334 %param = validate_with(params => \@_,
335 spec => {bug_num => {type => SCALAR,
338 logfh => {type => HANDLE,
341 log_name => {type => SCALAR,
344 inner_file => {type => BOOLEAN,
350 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
351 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
355 my $reader = Debbugs::Log->new(%param);
356 while (defined(my $record = $reader->read_record())) {
357 push @records, $record;
362 =item write_log_records
364 Takes a filehandle and a list of records as input, and prints the .log
365 format representation of those records to that filehandle.
371 sub write_log_records
373 my %param = validate_with(params => \@_,
374 spec => {bug_num => {type => SCALAR,
377 logfh => {type => HANDLE,
380 log_name => {type => SCALAR,
383 records => {type => HASHREF|ARRAYREF,
387 if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
388 croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
391 if (exists $param{logfh}) {
392 $logfh = $param{logfh}
394 elsif (exists $param{log_name}) {
395 $logfh = IO::File->new(">>$param{log_name}") or
396 die "Unable to open bug log $param{log_name} for writing: $!";
398 elsif (exists $param{bug_num}) {
399 my $location = getbuglocation($param{bug_num},'log');
400 my $bug_log = getbugcomponent($param{bug_num},'log',$location);
401 $logfh = IO::File->new($bug_log, 'r') or
402 die "Unable to open bug log $bug_log for reading: $!";
404 my @records = make_list($param{records});
406 for my $record (@records) {
407 my $type = $record->{type};
408 croak "record type '$type' with no text field" unless defined $record->{text};
409 # I am not sure if we really want to croak here; but this is
410 # almost certainly a bug if is_utf8 is on.
411 my $text = $record->{text};
412 if (is_utf8($text)) {
413 carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
414 $text = encode_utf8($text)
416 ($text) = escape_log($text);
417 if ($type eq 'autocheck') {
418 print {$logfh} "\01\n$text\03\n" or
419 die "Unable to write to logfile: $!";
420 } elsif ($type eq 'recips') {
421 print {$logfh} "\02\n";
422 my $recips = $record->{recips};
423 if (defined $recips) {
424 croak "recips not undef or array"
425 unless ref($recips) eq 'ARRAY';
426 my $wrong_encoding = 0;
428 map { if (is_utf8($_)) {
434 carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding;
435 print {$logfh} join("\04", @$recips) . "\n" or
436 die "Unable to write to logfile: $!";
438 print {$logfh} "-t\n" or
439 die "Unable to write to logfile: $!";
441 #$text =~ s/^([\01-\07\030])/\030$1/gm;
442 print {$logfh} "\05\n$text\03\n" or
443 die "Unable to write to logfile: $!";
444 } elsif ($type eq 'html') {
445 print {$logfh} "\06\n$text\03\n" or
446 die "Unable to write to logfile: $!";
447 } elsif ($type eq 'incoming-recv') {
448 #$text =~ s/^([\01-\07\030])/\030$1/gm;
449 print {$logfh} "\07\n$text\03\n" or
450 die "Unable to write to logfile: $!";
452 croak "unknown record type type '$type'";
461 print {$log} escape_log(@log)
463 Applies the log escape regex to the passed logfile.
469 return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log;
475 if ($record->{inner_file}) {
478 my $t = $record->{fh};
480 $record->{fh}->seek(0,0);
483 return $record->{text};
488 my ($record,$regex) = @_;
489 if ($record->{inner_file}) {
491 my $fh = $record->{fh};
493 if (@result = $_ =~ m/$regex/) {
494 $record->{fh}->seek(0,0);
498 $record->{fh}->seek(0,0);
501 my @result = $record->{text} =~ m/$regex/;
503 return $record->{text};
510 This module does none of the formatting that bugreport.cgi et al do. It's
511 simply a means for extracting and rewriting raw records.