From: Don Armstrong <don@archimedes>
Date: Fri, 20 Oct 2006 04:42:10 +0000 (-0700)
Subject:  * Use MLDBM DB_File Storable in gen-indices and cgi/common.pl
X-Git-Tag: release/2.6.0~592^2
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=8cf1e74aa43fafaed6ccd78778e64fd7592af687;p=debbugs.git

 * Use MLDBM DB_File Storable in gen-indices and cgi/common.pl
 * Add a reverse index to allow incremental updates of bugs
 * Add pod2usage documentation of gen-indices
 * Stop using pack et al; and just allow Storable to transparently
   handle everything
---

diff --git a/cgi/common.pl b/cgi/common.pl
index 82f3ef37..e497787f 100644
--- a/cgi/common.pl
+++ b/cgi/common.pl
@@ -3,7 +3,7 @@
 use DB_File;
 use Fcntl qw/O_RDONLY/;
 use Mail::Address;
-use MLDBM qw/DB_File/;
+use MLDBM qw(DB_File Storable);
 use POSIX qw/ceil/;
 
 use URI::Escape;
@@ -697,21 +697,13 @@ sub getbugs {
     if (defined $fastidx && -e $fastidx) {
         my %lookup;
 print STDERR "optimized\n" if ($debug);
-        tie %lookup, DB_File => $fastidx, O_RDONLY
+        tie %lookup, MLDBM => $fastidx, O_RDONLY
             or die "$0: can't open $fastidx ($!)\n";
 	while ($key = shift) {
             my $bugs = $lookup{$key};
             if (defined $bugs) {
-                push @result, (unpack 'N*', $bugs);
-            } elsif (defined $lookup{"count $key"}) {
-		my $which = 0;
-		while (1) {
-		    $bugs = $lookup{"$which $key"};
-		    last unless defined $bugs;
-		    push @result, (unpack 'N*', $bugs);
-		    $which += 100;
-		}
-	    }
+		 push @result, keys %{$bugs};
+            }
         }
 	untie %lookup;
 print STDERR "done optimized\n" if ($debug);
diff --git a/scripts/gen-indices.in b/scripts/gen-indices.in
index 05136f66..cf7f93be 100755
--- a/scripts/gen-indices.in
+++ b/scripts/gen-indices.in
@@ -7,12 +7,16 @@
 #use strict;
 
 use DB_File;
+use MLDBM qw(DB_FILE Storable);
 use Fcntl qw/O_RDWR O_CREAT O_TRUNC/;
 use File::Copy;
 
 use Getopt::Long;
 use Pod::Usage;
 
+use warnings;
+use strict;
+
 use File::stat;
 use List::Util qw(min);
 
@@ -25,6 +29,7 @@ gen-indices - Generates index files for the cgi scripts
  gen-indices [options]
 
  Options:
+  --index-path path to index location
   --quick update changed bugs
   --debug, -d debugging level (Default 0)
   --help, -h display this help
@@ -62,33 +67,51 @@ my %options = (debug           => 0,
 	       help            => 0,
 	       man             => 0,
 	       quick           => 0,
+	       index_path      => undef,
 	       );
 
-GetOptions(\%options,'quick!','debug|d+','help|h|?','man|m') or pod2usage(2);
-
+GetOptions(\%options,'quick!','index_path|index-path=s','debug|d+','help|h|?','man|m') or pod2usage(2);
+pod2usage(1) if $options{help};
+pod2usage(-verbose=>2) if $options{man};
 
+{ no warnings;
+  no strict;
 require '/etc/debbugs/config';
 require '/org/bugs.debian.org/scripts/errorlib';
+}
 
 chdir('/org/bugs.debian.org/spool') or die "chdir spool: $!\n";
 
 my $verbose = $options{debug};
-my $indexdest = "/org/bugs.debian.org/spool";
+my $indexdest = $options{index_path} || "/org/bugs.debian.org/spool";
 
 my $initialdir = "db-h";
 my $suffix = "";
 
-if ($ARGV[0] eq "archive") {
+if (defined $ARGV[0] and $ARGV[0] eq "archive") {
     $initialdir = "archive";
     $suffix = "-arc";
 }
 
-my @indexes = ('package', 'tag', 'severity', 'submitter-email');
-my %index = ();
+# NB: The reverse index is special; it's used to clean up during updates to bugs
+my @indexes = ('package', 'tag', 'severity', 'submitter-email','reverse');
+my $indexes;
+my %slow_index = ();
+my %fast_index = ();
+if (not $options{quick}) {
+     # We'll trade memory for speed here if we're not doing a quick rebuild
+     for my $indexes (@indexes) {
+	  $fast_index{$indexes} = {};
+     }
+     $indexes = \%fast_index;
+}
+else {
+     $indexes = \%slow_index;
+}
 my $time = undef;
 my $start_time = time;
 for my $i (@indexes) {
-	%{$index{$i}} = {};
+	$slow_index{$i} = {};
 	if ($options{quick}) {
 	     if (-e "$indexdest/by-$i${suffix}.idx") {
 		  system('cp','-a',"$indexdest/by-$i${suffix}.idx","$indexdest/by-$i${suffix}.idx.new") == 0
@@ -96,12 +119,12 @@ for my $i (@indexes) {
 		  my $stat = stat("$indexdest/by-$i${suffix}.idx") or die "Unable to stat $indexdest/by-$i${suffix}.idx";
 		  $time = defined $time ? min($time,$stat->mtime) : $stat->mtime;
 	     }
-	     tie %{$index{$i}}, DB_File => "$indexdest/by-$i$suffix.idx.new",
+	     tie %{$slow_index{$i}}, MLDBM => "$indexdest/by-$i$suffix.idx.new",
 		  O_RDWR|O_CREAT, 0666
 		       or die "$0: can't create by-$i$suffix-idx.new: $!";
 	}
 	else {
-	     tie %{$index{$i}}, DB_File => "$indexdest/by-$i$suffix.idx.new",
+	     tie %{$slow_index{$i}}, MLDBM => "$indexdest/by-$i$suffix.idx.new",
 		  O_RDWR|O_CREAT|O_TRUNC, 0666
 		       or die "$0: can't create by-$i$suffix-idx.new: $!";
 
@@ -110,16 +133,26 @@ for my $i (@indexes) {
 }
 
 sub addbugtoindex {
-	my ($i, $k, $bug) = @_;
-
-	my $cnt = 0;
-	if (exists $index{$i}->{"count $k"}) {
-		$cnt = unpack 'N', $index{$i}->{"count $k"};
-	}
-	$index{$i}->{"count $k"} = (pack 'N', 1+$cnt);
-	my $which = $cnt - ($cnt % 100);
-	$index{$i}->{"$which $k"} = '' unless defined $index{$i}->{"$which $k"};
-	$index{$i}->{"$which $k"} .= (pack 'N', $bug);
+     my ($index, $bug, @values) = @_;
+
+     if (exists $indexes->{reverse}{"$index $bug"}) {
+	  # We do this insanity to work around a "feature" in MLDBM
+	  for my $key (@{$indexes->{reverse}{"$index $bug"}}) {
+	       my $temp = $indexes->{$index}{$key};
+	       delete $temp->{$bug};
+	       $indexes->{$index}{$key} = $temp;
+	       $indexes->{$index}{"count $key"}--;
+	  }
+	  delete $indexes->{reverse}{"$index $bug"};
+     }
+     for my $key (@values) {
+	  $indexes->{$index}->{"count $key"}++;
+	  # We do this insanity to work around a "feature" in MLDBM
+	  my $temp = $indexes->{$index}->{$key};
+	  $temp->{$bug} = 1;
+	  $indexes->{$index}->{$key} = $temp;
+     }
+     $indexes->{reverse}{"$index $bug"} = [@values];
 }
 
 sub emailfromrfc822 {
@@ -129,7 +162,7 @@ sub emailfromrfc822 {
 	return $email;
 }
 
-#my $cnt = 0;
+my $cnt = 0;
 
 my @dirs = ($initialdir);
 while (my $dir = shift @dirs) {
@@ -142,25 +175,32 @@ while (my $dir = shift @dirs) {
 	my @list = map { m/^(\d+)\.summary$/?($1):() } @subdirs;
 	push @dirs, map { m/^(\d+)$/ && -d "$dir/$1"?("$dir/$1"):() } @subdirs;
 
-	for my $f (@list) {
+	for my $bug (@list) {
 		print "Up to $cnt bugs...\n" if (++$cnt % 100 == 0 && $verbose);
-		my $stat = stat(getbugcomponent($f,'summary'));
+		my $stat = stat(getbugcomponent($bug,'summary'));
 		next if $stat->mtime < $time;
-		my $fdata = readbug($f, $initialdir);
-		for my $p (split /[\s,]+/, $fdata->{"package"}) {
-		  addbugtoindex("package", $p, $f);
-		}
-		for my $t (split /[\s,]+/, $fdata->{"keywords"}) {
-		  addbugtoindex("tag", $t, $f);
-		}
-		addbugtoindex('submitter-email', 
-			emailfromrfc822($fdata->{"originator"}), $f);
-		addbugtoindex("severity", $fdata->{"severity"}, $f);
+		my $fdata = readbug($bug, $initialdir);
+		addbugtoindex("package", $bug, split /[\s,]+/, $fdata->{"package"});
+		addbugtoindex("tag", $bug, split /[\s,]+/, $fdata->{"keywords"});
+		addbugtoindex('submitter-email', $bug,
+			      emailfromrfc822($fdata->{"originator"}));
+		addbugtoindex("severity", $bug, $fdata->{"severity"});
 	}
 }
 
+if (not $options{quick}) {
+     # put the fast index into the slow index
+     for my $key1 (keys %fast_index) {
+	  for my $key2 (keys %{$fast_index{$key1}}) {
+	       $slow_index{$key1}{$key2} = $fast_index{$key1}{$key2};
+	  }
+	  print "Dealt with index $key1\n" if $verbose;
+     }
+}
+
+
 for my $i (@indexes) {
-	untie %{$indexes{$i}};
+	untie %{$slow_index{$i}};
 	move("$indexdest/by-$i$suffix.idx.new", "$indexdest/by-$i$suffix.idx");
 	system('touch','-d',"1/1/1970 + ${start_time}secs","$indexdest/by-$i$suffix.idx");
 }