]> git.donarmstrong.com Git - debbugs.git/blobdiff - Debbugs/Log.pm
Add IO::InnerFile support to Debbugs::Log and use it
[debbugs.git] / Debbugs / Log.pm
index c4d741e6ab46f322a9519075c2d60cbba416de64..dce86d5c05a36d708ed22efe3c95cf4b062c7783 100644 (file)
@@ -25,7 +25,7 @@ BEGIN {
     @EXPORT = ();
     %EXPORT_TAGS = (write => [qw(write_log_records),
                             ],
     @EXPORT = ();
     %EXPORT_TAGS = (write => [qw(write_log_records),
                             ],
-                   read  => [qw(read_log_records),
+                   read  => [qw(read_log_records record_text record_regex),
                             ],
                    misc  => [qw(escape_log),
                             ],
                             ],
                    misc  => [qw(escape_log),
                             ],
@@ -37,8 +37,10 @@ BEGIN {
 
 use Carp;
 
 
 use Carp;
 
-use Debbugs::Common qw(getbuglocation getbugcomponent);
+use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
 use Params::Validate qw(:types validate_with);
 use Params::Validate qw(:types validate_with);
+use Encode qw(encode encode_utf8 is_utf8);
+use IO::InnerFile;
 
 =head1 NAME
 
 
 =head1 NAME
 
@@ -50,6 +52,11 @@ The Debbugs::Log module provides a convenient way for scripts to read and
 write the .log files used by debbugs to store the complete textual records
 of all bug transactions.
 
 write the .log files used by debbugs to store the complete textual records
 of all bug transactions.
 
+Debbugs::Log does not decode utf8 into perl's internal encoding or
+encode into utf8 from perl's internal encoding. For html records and
+all recips, this should probably be done. For other records, this should
+not be needed.
+
 =head2 The .log File Format
 
 .log files consist of a sequence of records, of one of the following four
 =head2 The .log File Format
 
 .log files consist of a sequence of records, of one of the following four
@@ -163,18 +170,22 @@ sub new
     my %param;
     if (@_ == 1) {
         ($param{logfh}) = @_;
     my %param;
     if (@_ == 1) {
         ($param{logfh}) = @_;
+        $param{inner_file} = 0;
     }
     else {
         %param = validate_with(params => \@_,
                                spec   => {bug_num => {type => SCALAR,
                                                       optional => 1,
                                                      },
     }
     else {
         %param = validate_with(params => \@_,
                                spec   => {bug_num => {type => SCALAR,
                                                       optional => 1,
                                                      },
-                                          logfh   => {type => SCALAR,
+                                          logfh   => {type => HANDLE,
                                                       optional => 1,
                                                      },
                                           log_name => {type => SCALAR,
                                                        optional => 1,
                                                       optional => 1,
                                                      },
                                           log_name => {type => SCALAR,
                                                        optional => 1,
-                                                      },
+                                   },
+                           inner_file => {type => BOOLEAN,
+                                          default => 0,
+                                         },
                                          }
                               );
     }
                                          }
                               );
     }
@@ -202,6 +213,7 @@ sub new
 
     $self->{state} = 'kill-init';
     $self->{linenum} = 0;
 
     $self->{state} = 'kill-init';
     $self->{linenum} = 0;
+    $self->{inner_file} = $param{inner_file};
     return $self;
 }
 
     return $self;
 }
 
@@ -239,16 +251,21 @@ sub read_record
            }
 
            $this->{state} = $newstate;
            }
 
            $this->{state} = $newstate;
-
            if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
            if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) {
-               $record->{type} = $this->{state};
+            $record->{type} = $this->{state};
+            $record->{start} = $logfh->tell;
+            $record->{stop} = $logfh->tell;
+            $record->{inner_file} = $this->{inner_file};
            } elsif ($this->{state} eq 'kill-end') {
            } elsif ($this->{state} eq 'kill-end') {
+            if ($this->{inner_file}) {
+                $record->{fh} = IO::InnerFile->new($logfh,$record->{start},$record->{stop} - $record->{start})
+            }
                return $record;
            }
 
            next;
        }
                return $record;
            }
 
            next;
        }
-
+    $record->{stop} = $logfh->tell;
        $_ = $line;
        if ($this->{state} eq 'incoming-recv') {
            my $pl = $_;
        $_ = $line;
        if ($this->{state} eq 'incoming-recv') {
            my $pl = $_;
@@ -256,14 +273,14 @@ sub read_record
                die "bad line '$pl' in state incoming-recv";
            }
            $this->{state} = 'go';
                die "bad line '$pl' in state incoming-recv";
            }
            $this->{state} = 'go';
-           $record->{text} .= "$_\n";
+           $record->{text} .= "$_\n" unless $this->{inner_file};
        } elsif ($this->{state} eq 'html') {
        } elsif ($this->{state} eq 'html') {
-           $record->{text} .= "$_\n";
+           $record->{text} .= "$_\n"  unless $this->{inner_file};
        } elsif ($this->{state} eq 'go') {
            s/^\030//;
        } elsif ($this->{state} eq 'go') {
            s/^\030//;
-           $record->{text} .= "$_\n";
+           $record->{text} .= "$_\n"  unless $this->{inner_file};
        } elsif ($this->{state} eq 'go-nox') {
        } elsif ($this->{state} eq 'go-nox') {
-           $record->{text} .= "$_\n";
+           $record->{text} .= "$_\n"  unless $this->{inner_file};
        } elsif ($this->{state} eq 'recips') {
            if (/^-t$/) {
                undef $record->{recips};
        } elsif ($this->{state} eq 'recips') {
            if (/^-t$/) {
                undef $record->{recips};
@@ -273,11 +290,11 @@ sub read_record
            }
            $this->{state} = 'kill-body';
        } elsif ($this->{state} eq 'autocheck') {
            }
            $this->{state} = 'kill-body';
        } elsif ($this->{state} eq 'autocheck') {
-           $record->{text} .= "$_\n";
+           $record->{text} .= "$_\n" unless $this->{inner_file};
            next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
            $this->{state} = 'autowait';
        } elsif ($this->{state} eq 'autowait') {
            next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
            $this->{state} = 'autowait';
        } elsif ($this->{state} eq 'autowait') {
-           $record->{text} .= "$_\n";
+           $record->{text} .= "$_\n" unless $this->{inner_file};
            next if !/^$/;
            $this->{state} = 'go-nox';
        } else {
            next if !/^$/;
            $this->{state} = 'go-nox';
        } else {
@@ -299,14 +316,39 @@ Takes a .log filehandle as input, and returns an array of all records in
 that file. Throws exceptions using die(), so you may want to wrap this in an
 eval().
 
 that file. Throws exceptions using die(), so you may want to wrap this in an
 eval().
 
+Uses exactly the same options as Debbugs::Log::new
+
 =cut
 
 =cut
 
-sub read_log_records (*)
+sub read_log_records
 {
 {
-    my $logfh = shift;
+    my %param;
+    if (@_ == 1) {
+        ($param{logfh}) = @_;
+    }
+    else {
+        %param = validate_with(params => \@_,
+                               spec   => {bug_num => {type => SCALAR,
+                                                      optional => 1,
+                                                     },
+                                          logfh   => {type => HANDLE,
+                                                      optional => 1,
+                                                     },
+                                          log_name => {type => SCALAR,
+                                                       optional => 1,
+                                                      },
+                           inner_file => {type => BOOLEAN,
+                                          default => 0,
+                                         },
+                                         }
+                              );
+    }
+    if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
+        croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
+    }
 
     my @records;
 
     my @records;
-    my $reader = Debbugs::Log->new($logfh);
+    my $reader = Debbugs::Log->new(%param);
     while (defined(my $record = $reader->read_record())) {
        push @records, $record;
     }
     while (defined(my $record = $reader->read_record())) {
        push @records, $record;
     }
@@ -318,38 +360,92 @@ sub read_log_records (*)
 Takes a filehandle and a list of records as input, and prints the .log
 format representation of those records to that filehandle.
 
 Takes a filehandle and a list of records as input, and prints the .log
 format representation of those records to that filehandle.
 
+=back
+
 =cut
 
 =cut
 
-sub write_log_records (*@)
+sub write_log_records
 {
 {
-    my $logfh = shift;
-    my @records = @_;
+    my %param = validate_with(params => \@_,
+                             spec   => {bug_num => {type => SCALAR,
+                                                    optional => 1,
+                                                   },
+                                        logfh   => {type => HANDLE,
+                                                    optional => 1,
+                                                   },
+                                        log_name => {type => SCALAR,
+                                                     optional => 1,
+                                                    },
+                                        records => {type => HASHREF|ARRAYREF,
+                                                   },
+                                       },
+                            );
+    if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) {
+        croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined";
+    }
+    my $logfh;
+    if (exists $param{logfh}) {
+        $logfh = $param{logfh}
+    }
+    elsif (exists $param{log_name}) {
+        $logfh = IO::File->new(">>$param{log_name}") or
+             die "Unable to open bug log $param{log_name} for writing: $!";
+    }
+    elsif (exists $param{bug_num}) {
+        my $location = getbuglocation($param{bug_num},'log');
+        my $bug_log = getbugcomponent($param{bug_num},'log',$location);
+        $logfh = IO::File->new($bug_log, 'r') or
+             die "Unable to open bug log $bug_log for reading: $!";
+    }
+    my @records = make_list($param{records});
 
     for my $record (@records) {
        my $type = $record->{type};
 
     for my $record (@records) {
        my $type = $record->{type};
-       my ($text) = escape_log($record->{text});
-       die "type '$type' with no text field" unless defined $text;
+       croak "record type '$type' with no text field" unless defined $record->{text};
+       # I am not sure if we really want to croak here; but this is
+       # almost certainly a bug if is_utf8 is on.
+        my $text = $record->{text};
+        if (is_utf8($text)) {
+            carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
+            $text = encode_utf8($text)
+        }
+       ($text) = escape_log($text);
        if ($type eq 'autocheck') {
        if ($type eq 'autocheck') {
-           print $logfh "\01\n$text\03\n";
+           print {$logfh} "\01\n$text\03\n" or
+               die "Unable to write to logfile: $!";
        } elsif ($type eq 'recips') {
        } elsif ($type eq 'recips') {
-           print $logfh "\02\n";
+           print {$logfh} "\02\n";
            my $recips = $record->{recips};
            if (defined $recips) {
            my $recips = $record->{recips};
            if (defined $recips) {
-               die "recips not undef or array"
+               croak "recips not undef or array"
                    unless ref($recips) eq 'ARRAY';
                    unless ref($recips) eq 'ARRAY';
-               print $logfh join("\04", @$recips) . "\n";
+                my $wrong_encoding = 0;
+                my @recips =
+                    map { if (is_utf8($_)) {
+                        $wrong_encoding=1;
+                        encode_utf8($_);
+                    } else {
+                        $_;
+                    }} @$recips;
+                carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding;
+               print {$logfh} join("\04", @$recips) . "\n" or
+                   die "Unable to write to logfile: $!";
            } else {
            } else {
-               print $logfh "-t\n";
+               print {$logfh} "-t\n" or
+                   die "Unable to write to logfile: $!";
            }
            #$text =~ s/^([\01-\07\030])/\030$1/gm;
            }
            #$text =~ s/^([\01-\07\030])/\030$1/gm;
-           print $logfh "\05\n$text\03\n";
+           print {$logfh} "\05\n$text\03\n" or
+               die "Unable to write to logfile: $!";
        } elsif ($type eq 'html') {
        } elsif ($type eq 'html') {
-           print $logfh "\06\n$text\03\n";
+           print {$logfh} "\06\n$text\03\n" or
+               die "Unable to write to logfile: $!";
        } elsif ($type eq 'incoming-recv') {
            #$text =~ s/^([\01-\07\030])/\030$1/gm;
        } elsif ($type eq 'incoming-recv') {
            #$text =~ s/^([\01-\07\030])/\030$1/gm;
-           print $logfh "\07\n$text\03\n";
+           print {$logfh} "\07\n$text\03\n" or
+               die "Unable to write to logfile: $!";
        } else {
        } else {
-           die "unknown type '$type'";
+           croak "unknown record type type '$type'";
        }
     }
 
        }
     }
 
@@ -366,11 +462,44 @@ Applies the log escape regex to the passed logfile.
 
 sub escape_log {
        my @log = @_;
 
 sub escape_log {
        my @log = @_;
-       return map { s/^([\01-\07\030])/\030$1/gm; $_ } @log;
+       return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log;
 }
 
 
 }
 
 
-=back
+sub record_text {
+    my ($record) = @_;
+    if ($record->{inner_file}) {
+        local $/;
+        my $text;
+        my $t = $record->{fh};
+        $text = <$t>;
+        $record->{fh}->seek(0,0);
+        return $text;
+    } else {
+        return $record->{text};
+    }
+}
+
+sub record_regex {
+    my ($record,$regex) = @_;
+    if ($record->{inner_file}) {
+        my @result;
+        my $fh = $record->{fh};
+        while (<$fh>) {
+            if (@result = $_ =~ m/$regex/) {
+                $record->{fh}->seek(0,0);
+                return @result;
+            }
+        }
+        $record->{fh}->seek(0,0);
+        return ();
+    } else {
+        my @result = $record->{text} =~ m/$regex/;
+        return @result;
+        return $record->{text};
+    }
+}
+
 
 =head1 CAVEATS
 
 
 =head1 CAVEATS