X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FLog.pm;h=710a844164624c37a4e712c5f7830adc86784f71;hb=d076b44e22d1ade88885f04b59db5319597604e3;hp=268958e61a8e95ca3c8e0ca641275ae20e771069;hpb=ece213fb252bd813a0643689242dca211638b7d5;p=debbugs.git diff --git a/Debbugs/Log.pm b/Debbugs/Log.pm index 268958e..710a844 100644 --- a/Debbugs/Log.pm +++ b/Debbugs/Log.pm @@ -11,12 +11,13 @@ package Debbugs::Log; - -use warnings; -use strict; +use Mouse; +use strictures 2; +use namespace::clean; +use v5.10; # for state use vars qw($VERSION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS); -use base qw(Exporter); +use Exporter qw(import); BEGIN { $VERSION = 1.00; @@ -25,7 +26,7 @@ BEGIN { @EXPORT = (); %EXPORT_TAGS = (write => [qw(write_log_records), ], - read => [qw(read_log_records), + read => [qw(read_log_records record_text record_regex), ], misc => [qw(escape_log), ], @@ -37,8 +38,10 @@ BEGIN { use Carp; -use Debbugs::Common qw(getbuglocation getbugcomponent); +use Debbugs::Common qw(getbuglocation getbugcomponent make_list); use Params::Validate qw(:types validate_with); +use Encode qw(encode encode_utf8 is_utf8); +use IO::InnerFile; =head1 NAME @@ -50,6 +53,11 @@ The Debbugs::Log module provides a convenient way for scripts to read and write the .log files used by debbugs to store the complete textual records of all bug transactions. +Debbugs::Log does not decode utf8 into perl's internal encoding or +encode into utf8 from perl's internal encoding. For html records and +all recips, this should probably be done. For other records, this should +not be needed. + =head2 The .log File Format .log files consist of a sequence of records, of one of the following four @@ -157,53 +165,99 @@ One of the above options must be passed. =cut -sub new -{ - my $this = shift; - my %param; - if (@_ == 1) { - ($param{logfh}) = @_; - } - else { - %param = validate_with(params => \@_, - spec => {bug_num => {type => SCALAR, - optional => 1, - }, - logfh => {type => HANDLE, - optional => 1, - }, - log_name => {type => SCALAR, - optional => 1, - }, - } - ); +sub BUILD { + my ($self,$args) = @_; + if (not ($self->_has_bug_num or + $self->_has_logfh or + $self->_has_log_name)) { + croak "Exactly one of bug_num, logfh, or log_name ". + "must be passed and must be defined"; } - if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) { - croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined"; +} + +has 'bug_num' => + (is => 'ro', + isa => 'Int', + predicate => '_has_bug_num', + ); + +has 'logfh' => + (is => 'ro', + lazy => 1, + builder => '_build_logfh', + predicate => '_has_logfh', + ); + +sub _build_logfh { + my $self = shift; + my $bug_log = + $self->log_name; + my $log_fh; + if ($bug_log =~ m/\.gz$/) { + my $oldpath = $ENV{'PATH'}; + $ENV{'PATH'} = '/bin:/usr/bin'; + open($log_fh,'-|','gzip','-dc',$bug_log) or + die "Unable to open $bug_log for reading: $!"; + $ENV{'PATH'} = $oldpath; + } else { + open($log_fh,'<',$bug_log) or + die "Unable to open $bug_log for reading: $!"; } + return $log_fh; +} - my $class = ref($this) || $this; - my $self = {}; - bless $self, $class; +has 'log_name' => + (is => 'ro', + isa => 'Str', + lazy => 1, + builder => '_build_log_name', + predicate => '_has_log_name', + ); + +sub _build_log_name { + my $self = shift; + my $location = getbuglocation($self->bug_num,'log'); + return getbugcomponent($self->bug_num,'log',$location); +} - if (exists $param{logfh}) { - $self->{logfh} = $param{logfh} - } - elsif (exists $param{log_name}) { - $self->{logfh} = IO::File->new($param{log_name},'r') or - die "Unable to open bug log $param{log_name} for reading: $!"; - } - elsif (exists $param{bug_num}) { - my $location = getbuglocation($param{bug_num},'log'); - my $bug_log = getbugcomponent($param{bug_num},'log',$location); - $self->{logfh} = IO::File->new($bug_log, 'r') or - die "Unable to open bug log $bug_log for reading: $!"; +has 'inner_file' => + (is => 'ro', + isa => 'Bool', + default => 0, + ); + +has 'state' => + (is => 'ro', + isa => 'Str', + default => 'kill-init', + writer => '_state', + ); + +sub state_transition { + my $self = shift; + my $new_state = shift; + my $old_state = $self->state; + local $_ = "$old_state $new_state"; + unless (/^(go|go-nox|html) kill-end$/ or + /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or + /^autocheck autowait$/ or + /^autowait go-nox$/ or + /^recips kill-body$/ or + /^(kill-body|incoming-recv) go$/) { + confess "transition from $old_state to $new_state at $self->linenum disallowed"; } + $self->_state($new_state); +} - $self->{state} = 'kill-init'; - $self->{linenum} = 0; - return $self; +sub increment_linenum { + my $self = shift; + $self->_linenum($self->_linenum+1); } +has '_linenum' => + (is => 'rw', + isa => 'Int', + default => 0, + ); =item read_record @@ -216,7 +270,7 @@ in an eval(). sub read_record { my $this = shift; - my $logfh = $this->{logfh}; + my $logfh = $this->logfh; # This comes from bugreport.cgi, but is much simpler since it doesn't # worry about the details of output. @@ -224,67 +278,68 @@ sub read_record my $record = {}; while (defined (my $line = <$logfh>)) { + $record->{start} = $logfh->tell() if not defined $record->{start}; chomp $line; - ++$this->{linenum}; + $this->increment_linenum; if (length($line) == 1 and exists $states{ord($line)}) { # state transitions - my $newstate = $states{ord($line)}; - - # disallowed transitions - $_ = "$this->{state} $newstate"; - unless (/^(go|go-nox|html) kill-end$/ or - /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or - /^kill-body go$/) { - die "transition from $this->{state} to $newstate at $this->{linenum} disallowed"; - } - - $this->{state} = $newstate; - - if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) { - $record->{type} = $this->{state}; - } elsif ($this->{state} eq 'kill-end') { + $this->state_transition($states{ord($line)}); + if ($this->state =~ /^(autocheck|recips|html|incoming-recv)$/) { + $record->{type} = $this->state; + $record->{start} = $logfh->tell; + $record->{stop} = $logfh->tell; + $record->{inner_file} = $this->inner_file; + } elsif ($this->state eq 'kill-end') { + if ($this->inner_file) { + $record->{fh} = + IO::InnerFile->new($logfh,$record->{start}, + $record->{stop} - $record->{start}) + } return $record; } next; } - + $record->{stop} = $logfh->tell; $_ = $line; - if ($this->{state} eq 'incoming-recv') { + if ($this->state eq 'incoming-recv') { my $pl = $_; unless (/^Received: \(at \S+\) by \S+;/) { die "bad line '$pl' in state incoming-recv"; } - $this->{state} = 'go'; - $record->{text} .= "$_\n"; - } elsif ($this->{state} eq 'html') { - $record->{text} .= "$_\n"; - } elsif ($this->{state} eq 'go') { + $this->state_transition('go'); + $record->{text} .= "$_\n" unless $this->inner_file; + } elsif ($this->state eq 'html') { + $record->{text} .= "$_\n" unless $this->inner_file; + } elsif ($this->state eq 'go') { s/^\030//; - $record->{text} .= "$_\n"; - } elsif ($this->{state} eq 'go-nox') { - $record->{text} .= "$_\n"; - } elsif ($this->{state} eq 'recips') { + $record->{text} .= "$_\n" unless $this->inner_file; + } elsif ($this->state eq 'go-nox') { + $record->{text} .= "$_\n" unless $this->inner_file; + } elsif ($this->state eq 'recips') { if (/^-t$/) { undef $record->{recips}; } else { # preserve trailing null fields, e.g. #2298 $record->{recips} = [split /\04/, $_, -1]; } - $this->{state} = 'kill-body'; - } elsif ($this->{state} eq 'autocheck') { - $record->{text} .= "$_\n"; + $this->state_transition('kill-body'); + $record->{start} = $logfh->tell+2; + $record->{stop} = $logfh->tell+2; + $record->{inner_file} = $this->inner_file; + } elsif ($this->state eq 'autocheck') { + $record->{text} .= "$_\n" unless $this->inner_file; next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/; - $this->{state} = 'autowait'; - } elsif ($this->{state} eq 'autowait') { - $record->{text} .= "$_\n"; + $this->state_transition('autowait'); + } elsif ($this->state eq 'autowait') { + $record->{text} .= "$_\n" unless $this->inner_file; next if !/^$/; - $this->{state} = 'go-nox'; + $this->state_transition('go-nox'); } else { - die "state $this->{state} at line $this->{linenum} ('$_')"; + die "state $this->state at line $this->linenum ('$_')"; } } - die "state $this->{state} at end" unless $this->{state} eq 'kill-end'; + die "state $this->state at end" unless $this->state eq 'kill-end'; if (keys %$record) { return $record; @@ -293,6 +348,42 @@ sub read_record } } +=item rewind + +Rewinds the Debbugs::Log to the beginning + +=cut + +sub rewind { + my $self = shift; + if ($self->_has_log_name) { + $self->_clear_log_fh; + } else { + $self->log_fh->seek(0); + } + $self->_state('kill-init'); + $self->_linenum(0); +} + +=item read_all_records + +Reads all of the Debbugs::Records + +=cut + +sub read_all_records { + my $self = shift; + if ($self->_linenum != 0) { + $self->rewind; + } + my @records; + while (defined(my $record = $self->read_record())) { + push @records, $record; + } + return @records; +} + + =item read_log_records Takes a .log filehandle as input, and returns an array of all records in @@ -320,6 +411,9 @@ sub read_log_records log_name => {type => SCALAR, optional => 1, }, + inner_file => {type => BOOLEAN, + default => 0, + }, } ); } @@ -344,36 +438,88 @@ format representation of those records to that filehandle. =cut -sub write_log_records (*@) +sub write_log_records { - my $logfh = shift; - my @records = @_; + my %param = validate_with(params => \@_, + spec => {bug_num => {type => SCALAR, + optional => 1, + }, + logfh => {type => HANDLE, + optional => 1, + }, + log_name => {type => SCALAR, + optional => 1, + }, + records => {type => HASHREF|ARRAYREF, + }, + }, + ); + if (grep({exists $param{$_} and defined $param{$_}} qw(bug_num logfh log_name)) ne 1) { + croak "Exactly one of bug_num, logfh, or log_name must be passed and must be defined"; + } + my $logfh; + if (exists $param{logfh}) { + $logfh = $param{logfh} + } + elsif (exists $param{log_name}) { + $logfh = IO::File->new(">>$param{log_name}") or + die "Unable to open bug log $param{log_name} for writing: $!"; + } + elsif (exists $param{bug_num}) { + my $location = getbuglocation($param{bug_num},'log'); + my $bug_log = getbugcomponent($param{bug_num},'log',$location); + $logfh = IO::File->new($bug_log, 'r') or + die "Unable to open bug log $bug_log for reading: $!"; + } + my @records = make_list($param{records}); for my $record (@records) { my $type = $record->{type}; - my ($text) = escape_log($record->{text}); - die "type '$type' with no text field" unless defined $text; + croak "record type '$type' with no text field" unless defined $record->{text}; + # I am not sure if we really want to croak here; but this is + # almost certainly a bug if is_utf8 is on. + my $text = $record->{text}; + if (is_utf8($text)) { + carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)'); + $text = encode_utf8($text) + } + ($text) = escape_log($text); if ($type eq 'autocheck') { - print $logfh "\01\n$text\03\n"; + print {$logfh} "\01\n$text\03\n" or + die "Unable to write to logfile: $!"; } elsif ($type eq 'recips') { - print $logfh "\02\n"; + print {$logfh} "\02\n"; my $recips = $record->{recips}; if (defined $recips) { - die "recips not undef or array" + croak "recips not undef or array" unless ref($recips) eq 'ARRAY'; - print $logfh join("\04", @$recips) . "\n"; + my $wrong_encoding = 0; + my @recips = + map { if (is_utf8($_)) { + $wrong_encoding=1; + encode_utf8($_); + } else { + $_; + }} @$recips; + carp('Recipients was in the wrong encoding (perl internal instead of utf8 octets') if $wrong_encoding; + print {$logfh} join("\04", @$recips) . "\n" or + die "Unable to write to logfile: $!"; } else { - print $logfh "-t\n"; + print {$logfh} "-t\n" or + die "Unable to write to logfile: $!"; } #$text =~ s/^([\01-\07\030])/\030$1/gm; - print $logfh "\05\n$text\03\n"; + print {$logfh} "\05\n$text\03\n" or + die "Unable to write to logfile: $!"; } elsif ($type eq 'html') { - print $logfh "\06\n$text\03\n"; + print {$logfh} "\06\n$text\03\n" or + die "Unable to write to logfile: $!"; } elsif ($type eq 'incoming-recv') { #$text =~ s/^([\01-\07\030])/\030$1/gm; - print $logfh "\07\n$text\03\n"; + print {$logfh} "\07\n$text\03\n" or + die "Unable to write to logfile: $!"; } else { - die "unknown type '$type'"; + croak "unknown record type type '$type'"; } } @@ -390,7 +536,41 @@ Applies the log escape regex to the passed logfile. sub escape_log { my @log = @_; - return map { s/^([\01-\07\030])/\030$1/gm; $_ } @log; + return map {s/^([\01-\07\030])/\030$1/gm; $_ } @log; +} + + +sub record_text { + my ($record) = @_; + if ($record->{inner_file}) { + local $/; + my $text; + my $t = $record->{fh}; + $text = <$t>; + $record->{fh}->seek(0,0); + return $text; + } else { + return $record->{text}; + } +} + +sub record_regex { + my ($record,$regex) = @_; + if ($record->{inner_file}) { + my @result; + my $fh = $record->{fh}; + while (<$fh>) { + if (@result = $_ =~ m/$regex/) { + $record->{fh}->seek(0,0); + return @result; + } + } + $record->{fh}->seek(0,0); + return (); + } else { + my @result = $record->{text} =~ m/$regex/; + return @result; + } } @@ -402,3 +582,8 @@ simply a means for extracting and rewriting raw records. =cut 1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: