X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FEstraier.pm;h=5987fe9ec5ddf1f5485157fa407b5444573fb0c1;hb=9ba439f0dec2d3980de21b01d5f335a9d213158f;hp=86963ee334248a798e93a7c9d3d77e0ade867f3a;hpb=f3a25fc7566f8a4a98996a0e01fcb03dc0adf72b;p=debbugs.git diff --git a/Debbugs/Estraier.pm b/Debbugs/Estraier.pm index 86963ee..5987fe9 100644 --- a/Debbugs/Estraier.pm +++ b/Debbugs/Estraier.pm @@ -1,3 +1,9 @@ +# This module is part of debbugs, and is released +# under the terms of the GPL version 2, or any later +# version at your option. +# See the file README and COPYING for more information. +# +# Copyright 2007 by Don Armstrong . package Debbugs::Estraier; @@ -24,11 +30,11 @@ use strict; use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT); use base qw(Exporter); use Debbugs::Log; -#use Params::Validate; use Search::Estraier; -use Date::Manip; -use Debbugs::Common qw(getbuglocation getbugcomponent readbug); - +use Debbugs::Common qw(getbuglocation getbugcomponent make_list); +use Debbugs::Status qw(readbug); +use Debbugs::MIME qw(parse); +use Encode qw(encode_utf8); BEGIN{ ($VERSION) = q$Revision: 1.3 $ =~ /^Revision:\s+([^\s+])/; @@ -69,9 +75,39 @@ sub add_bug_log{ my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im; next if defined $msg_id and exists $seen_msg_ids{$msg_id}; $seen_msg_ids{$msg_id} = 1 if defined $msg_id; - next if $msg_id =~ /handler\..+\.ack(?:info)?\@/; + next if defined $msg_id and $msg_id =~ /handler\..+\.ack(?:info)?\@/; add_bug_message($est,$record->{text},$bug_num,$msg_num,$status) } + return $msg_num; +} + +=head2 remove_old_message + + remove_old_message($est,300000,50); + +Removes all messages which are no longer in the log + +=cut + +sub remove_old_messages{ + my ($est,$bug_num,$max_message) = @_; + # remove records which are no longer present in the log (uri > $msg_num) + my $cond = new Search::Estraier::Condition; + $cond->add_attr('@uri STRBW '.$bug_num.'/'); + $cond->set_max(50); + my $skip; + my $nres; + while ($nres = $est->search($cond,0) and $nres->doc_num > 0){ + for my $rdoc (map {$nres->get_doc($_)} 0..($nres->doc_num-1)) { + my $uri = $rdoc->uri; + my ($this_message) = $uri =~ m{/(\d+)$}; + next unless $this_message > $max_message; + $est->out_doc_by_uri($uri); + } + last unless $nres->doc_num >= $cond->max; + $cond->set_skip($cond->skip+$cond->max); + } + } sub add_bug_message{ @@ -82,7 +118,9 @@ sub add_bug_message{ my $uri = "$bug_num/$msg_num"; $doc = $est->get_doc_by_uri($uri); $doc = new Search::Estraier::Document if not defined $doc; - $doc->add_text($bug_message); + + my $message = parse($bug_message); + $doc->add_text(encode_utf8(join("\n",make_list(values %{$message})))); # * @id : the ID number determined automatically when the document is registered. # * @uri : the location of a document which any document should have. @@ -101,17 +139,17 @@ sub add_bug_message{ my @attr = qw(status subject date submitter package tags severity); # parse the date my ($date) = $bug_message =~ /^Date:\s+(.+?)\s*$/mi; - $doc->add_attr('@cdate' => $date); + $doc->add_attr('@cdate' => encode_utf8($date)) if defined $date; # parse the title my ($subject) = $bug_message =~ /^Subject:\s+(.+?)\s*$/mi; - $doc->add_attr('@title' => $subject); + $doc->add_attr('@title' => encode_utf8($subject)) if defined $subject; # parse the author my ($author) = $bug_message =~ /^From:\s+(.+?)\s*$/mi; - $doc->add_attr('@author' => $author); + $doc->add_attr('@author' => encode_utf8($author)) if defined $author; # create the uri - $doc->add_attr('@uri' => $uri); + $doc->add_attr('@uri' => encode_utf8($uri)); foreach my $attr (@attr) { - $doc->add_attr($attr => $status->{$attr}); + $doc->add_attr($attr => encode_utf8($status->{$attr})) if defined $status->{$attr}; } print STDERR "adding $uri\n" if $DEBUG; # Try a bit harder if estraier is returning timeouts