From: cjwatson <> Date: Mon, 25 Jul 2005 16:18:02 +0000 (-0800) Subject: [project @ 2005-07-25 09:18:02 by cjwatson] X-Git-Tag: release/2.6.0~683 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=986cf168bf54acfa5e0dd70fb6c316fcd2c1c78a;p=debbugs.git [project @ 2005-07-25 09:18:02 by cjwatson] Add an incremental read_record interface to allow processing records one at a time rather than having to slurp them all into memory. --- diff --git a/Debbugs/Log.pm b/Debbugs/Log.pm index 5889fdc..c9a9e30 100644 --- a/Debbugs/Log.pm +++ b/Debbugs/Log.pm @@ -83,6 +83,17 @@ different format.) No other types of records are permitted, and the file must end with a ^C line. +=cut + +my %states = ( + 1 => 'autocheck', + 2 => 'recips', + 3 => 'kill-end', + 5 => 'go', + 6 => 'html', + 7 => 'incoming-recv', +); + =head2 Perl Record Representation Each record is a hash. The C field is C, C, @@ -94,99 +105,129 @@ C<[html]> as above; C is a reference to an array of recipients =over 4 -=item read_log_records +=item new -Takes a .log filehandle as input, and returns an array of all records in -that file. Throws exceptions using die(), so you may want to wrap this in an -eval(). +Creates a new log reader based on a .log filehandle. =cut -sub read_log_records (*) +sub new { - my $logfh = shift; + my $this = shift; + my $class = ref($this) || $this; + my $self = {}; + bless $self, $class; + $self->{logfh} = shift; + $self->{state} = 'kill-init'; + $self->{linenum} = 0; + return $self; +} - # This comes from bugreport.cgi, but is much simpler since it doesn't - # worry about the details of output. +=item read_record - my %states = ( - 1 => 'autocheck', - 2 => 'recips', - 3 => 'kill-end', - 5 => 'go', - 6 => 'html', - 7 => 'incoming-recv', - ); +Reads and returns a single record from a log reader object. At end of file, +returns undef. Throws exceptions using die(), so you may want to wrap this +in an eval(). - my @records; +=cut + +sub read_record +{ + my $this = shift; + my $logfh = $this->{logfh}; + + # This comes from bugreport.cgi, but is much simpler since it doesn't + # worry about the details of output. - my $normstate = 'kill-init'; - my $linenum = 0; my $record = {}; while (defined (my $line = <$logfh>)) { chomp $line; - ++$linenum; + ++$this->{linenum}; if (length($line) == 1 and exists $states{ord($line)}) { # state transitions my $newstate = $states{ord($line)}; # disallowed transitions - $_ = "$normstate $newstate"; + $_ = "$this->{state} $newstate"; unless (/^(go|go-nox|html) kill-end$/ or /^(kill-init|kill-end) (incoming-recv|autocheck|recips|html)$/ or /^kill-body go$/) { - die "transition from $normstate to $newstate at $linenum disallowed"; + die "transition from $this->{state} to $newstate at $this->{linenum} disallowed"; } - if ($newstate =~ /^(autocheck|recips|html|incoming-recv)$/) { - $record->{type} = $newstate; - } elsif ($newstate eq 'kill-end') { - push @records, $record; - $record = {}; + $this->{state} = $newstate; + + if ($this->{state} =~ /^(autocheck|recips|html|incoming-recv)$/) { + $record->{type} = $this->{state}; + } elsif ($this->{state} eq 'kill-end') { + return $record; } - $normstate = $newstate; next; } $_ = $line; - if ($normstate eq 'incoming-recv') { + if ($this->{state} eq 'incoming-recv') { my $pl = $_; unless (/^Received: \(at \S+\) by \S+;/) { die "bad line '$pl' in state incoming-recv"; } - $normstate = 'go'; + $this->{state} = 'go'; $record->{text} .= "$_\n"; - } elsif ($normstate eq 'html') { + } elsif ($this->{state} eq 'html') { $record->{text} .= "$_\n"; - } elsif ($normstate eq 'go') { + } elsif ($this->{state} eq 'go') { s/^\030//; $record->{text} .= "$_\n"; - } elsif ($normstate eq 'go-nox') { + } elsif ($this->{state} eq 'go-nox') { $record->{text} .= "$_\n"; - } elsif ($normstate eq 'recips') { + } elsif ($this->{state} eq 'recips') { if (/^-t$/) { undef $record->{recips}; } else { # preserve trailing null fields, e.g. #2298 $record->{recips} = [split /\04/, $_, -1]; } - $normstate = 'kill-body'; - } elsif ($normstate eq 'autocheck') { + $this->{state} = 'kill-body'; + } elsif ($this->{state} eq 'autocheck') { $record->{text} .= "$_\n"; next if !/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/; - $normstate = 'autowait'; - } elsif ($normstate eq 'autowait') { + $this->{state} = 'autowait'; + } elsif ($this->{state} eq 'autowait') { $record->{text} .= "$_\n"; next if !/^$/; - $normstate = 'go-nox'; + $this->{state} = 'go-nox'; } else { - die "state $normstate at line $linenum ('$_')"; + die "state $this->{state} at line $this->{linenum} ('$_')"; } } - die "state $normstate at end" unless $normstate eq 'kill-end'; + die "state $this->{state} at end" unless $this->{state} eq 'kill-end'; + + if (keys %$record) { + return $record; + } else { + return undef; + } +} + +=item read_log_records + +Takes a .log filehandle as input, and returns an array of all records in +that file. Throws exceptions using die(), so you may want to wrap this in an +eval(). + +=cut +sub read_log_records (*) +{ + my $logfh = shift; + + my @records; + my $reader = Debbugs::Log->new($logfh); + while (defined(my $record = $reader->read_record())) { + push @records, $record; + } return @records; }