X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FLog.pm;h=2531a6d5c22e017d3dcc8db9ba07e9029825a2fd;hb=1b1562614656acf70e9b69c8eb736673f370c816;hp=a28384de59bd5ddaf9ce41d8b32f7b8dbd504779;hpb=da0e33deed876669ce5112d02797269c7a65b19e;p=debbugs.git diff --git a/Debbugs/Log.pm b/Debbugs/Log.pm index a28384d..2531a6d 100644 --- a/Debbugs/Log.pm +++ b/Debbugs/Log.pm @@ -16,7 +16,7 @@ use warnings; use strict; use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS); -use base qw(Exporter); +use Exporter qw(import); BEGIN { $VERSION = 1.00; @@ -25,7 +25,7 @@ BEGIN { @EXPORT = (); %EXPORT_TAGS = (write => [qw(write_log_records), ], - read => [qw(read_log_records), + read => [qw(read_log_records record_text record_regex), ], misc => [qw(escape_log), ], @@ -39,7 +39,9 @@ use Carp; use Debbugs::Common qw(getbuglocation getbugcomponent make_list); use Params::Validate qw(:types validate_with); -use Encode qw(encode); +use Encode qw(encode encode_utf8 is_utf8); +use IO::InnerFile; +use feature 'state'; =head1 NAME @@ -51,6 +53,11 @@ The Debbugs::Log module provides a convenient way for scripts to read and write the .log files used by debbugs to store the complete textual records of all bug transactions. +Debbugs::Log does not decode utf8 into perl's internal encoding or +encode into utf8 from perl's internal encoding. For html records and +all recips, this should probably be done. For other records, this should +not be needed. + =head2 The .log File Format .log files consist of a sequence of records, of one of the following four @@ -164,23 +171,31 @@ sub new my %param; if (@_ == 1) { ($param{logfh}) = @_; + $param{inner_file} = 0; } else { - %param = validate_with(params => \@_, - spec => {bug_num => {type => SCALAR, - optional => 1, - }, - logfh => {type => HANDLE, - optional => 1, - }, - log_name => {type => SCALAR, - optional => 1, - }, - } - ); + state $spec = + {bug_num => {type => SCALAR, + optional => 1, + }, + logfh => {type => HANDLE, + optional => 1, + }, + log_name => {type => SCALAR, + optional => 1, + }, + inner_file => {type => BOOLEAN, + default => 0, + }, + }; + %param = validate_with(params => \@_, + spec => $spec, + ); } - if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) { - croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined"; + if (grep({exists $param{$_} and defined $param{$_}} + qw(bug_num logfh log_name)) ne 1) { + croak "Exactly one of bug_num, logfh, or log_name ". + "must be passed and must be defined"; } my $class = ref($this) || $this; @@ -188,21 +203,30 @@ sub new bless $self, $class; if (exists $param{logfh}) { - $self->{logfh} = $param{logfh} - } - elsif (exists $param{log_name}) { - $self->{logfh} = IO::File->new($param{log_name},'r') or - die "Unable to open bug log $param{log_name} for reading: $!"; - } - elsif (exists $param{bug_num}) { - my $location = getbuglocation($param{bug_num},'log'); - my $bug_log = getbugcomponent($param{bug_num},'log',$location); - $self->{logfh} = IO::File->new($bug_log, 'r') or - die "Unable to open bug log $bug_log for reading: $!"; + $self->{logfh} = $param{logfh} + } else { + my $bug_log; + if (exists $param{bug_num}) { + my $location = getbuglocation($param{bug_num},'log'); + $bug_log = getbugcomponent($param{bug_num},'log',$location); + } else { + $bug_log = $param{log_name}; + } + if ($bug_log =~ m/\.gz$/) { + my $oldpath = $ENV{'PATH'}; + $ENV{'PATH'} = '/bin:/usr/bin'; + open($self->{logfh},'-|','gzip','-dc',$bug_log) or + die "Unable to open $bug_log for reading: $!"; + $ENV{'PATH'} = $oldpath; + } else { + open($self->{logfh},'<',$bug_log) or + die "Unable to open $bug_log for reading: $!"; + } } $self->{state} = 'kill-init'; $self->{linenum} = 0; + $self->{inner_file} = $param{inner_file}; return $self; } @@ -240,16 +264,21 @@ sub read_record } $this->{state} = $newstate; - if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) { - $record->{type} = $this->{state}; + $record->{type} = $this->{state}; + $record->{start} = $logfh->tell; + $record->{stop} = $logfh->tell; + $record->{inner_file} = $this->{inner_file}; } elsif ($this->{state} eq 'kill-end') { + if ($this->{inner_file}) { + $record->{fh} = IO::InnerFile->new($logfh,$record->{start},$record->{stop} - $record->{start}) + } return $record; } next; } - + $record->{stop} = $logfh->tell; $_ = $line; if ($this->{state} eq 'incoming-recv') { my $pl = $_; @@ -257,14 +286,14 @@ sub read_record die "bad line '$pl' in state incoming-recv"; } $this->{state} = 'go'; - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'html') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'go') { s/^\030//; - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'go-nox') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; } elsif ($this->{state} eq 'recips') { if (/^-t$/) { undef $record->{recips}; @@ -273,12 +302,15 @@ sub read_record $record->{recips} = [split /\04/, $_, -1]; } $this->{state} = 'kill-body'; + $record->{start} = $logfh->tell+2; + $record->{stop} = $logfh->tell+2; + $record->{inner_file} = $this->{inner_file}; } elsif ($this->{state} eq 'autocheck') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/; $this->{state} = 'autowait'; } elsif ($this->{state} eq 'autowait') { - $record->{text} .= "$_\n"; + $record->{text} .= "$_\n" unless $this->{inner_file}; next if !/^$/; $this->{state} = 'go-nox'; } else { @@ -321,6 +353,9 @@ sub read_log_records log_name => {type => SCALAR, optional => 1, }, + inner_file => {type => BOOLEAN, + default => 0, + }, } ); } @@ -383,7 +418,14 @@ sub write_log_records for my $record (@records) { my $type = $record->{type}; croak "record type '$type' with no text field" unless defined $record->{text}; - my ($text) = escape_log($record->{text}); + # I am not sure if we really want to croak here; but this is + # almost certainly a bug if is_utf8 is on. + my $text = $record->{text}; + if (is_utf8($text)) { + carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)'); + $text = encode_utf8($text) + } + ($text) = escape_log($text); if ($type eq 'autocheck') { print {$logfh} "\01\n$text\03\n" or die "Unable to write to logfile: $!"; @@ -393,6 +435,15 @@ sub write_log_records if (defined $recips) { croak "recips not undef or array" unless ref($recips) eq 'ARRAY'; + my $wrong_encoding = 0; + my @recips = + map { if (is_utf8($_)) { + $wrong_encoding=1; + encode_utf8($_); + } else { + $_; + }} @$recips; + carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding; print {$logfh} join("\04", @$recips) . "\n" or die "Unable to write to logfile: $!"; } else { @@ -427,7 +478,41 @@ Applies the log escape regex to the passed logfile. sub escape_log { my @log = @_; - return map { eval {$_ = encode("utf8",$_,Encode::FB_CROAK)}; s/^([\01-\07\030])/\030$1/gm; $_ } @log; + return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log; +} + + +sub record_text { + my ($record) = @_; + if ($record->{inner_file}) { + local $/; + my $text; + my $t = $record->{fh}; + $text = <$t>; + $record->{fh}->seek(0,0); + return $text; + } else { + return $record->{text}; + } +} + +sub record_regex { + my ($record,$regex) = @_; + if ($record->{inner_file}) { + my @result; + my $fh = $record->{fh}; + while (<$fh>) { + if (@result = $_ =~ m/$regex/) { + $record->{fh}->seek(0,0); + return @result; + } + } + $record->{fh}->seek(0,0); + return (); + } else { + my @result = $record->{text} =~ m/$regex/; + return @result; + } } @@ -439,3 +524,8 @@ simply a means for extracting and rewriting raw records. =cut 1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: