]> git.donarmstrong.com Git - debbugs.git/commitdiff
switch load_packages to use gzip -dc
authorDon Armstrong <don@donarmstrong.com>
Mon, 9 Jan 2017 23:14:37 +0000 (15:14 -0800)
committerDon Armstrong <don@donarmstrong.com>
Mon, 9 Jan 2017 23:14:37 +0000 (15:14 -0800)
 - Update with new schema

Debbugs/DB/Load.pm
bin/debbugs-loadsql

index 4697e62fef7dd9862cfa4fada809262fed1508e2..665431db0eed27cfe602b2a6b1d41e22bc45e73a 100644 (file)
@@ -384,64 +384,123 @@ sub load_debinfo {
 
 =cut
 
-sub load_package {
-    my ($schema,$suite,$component,$arch,$pkg) = @_;
-    if ($arch eq 'source') {
-       my $sp = $schema->resultset('SrcPkg')->find_or_create({pkg => $pkg->{Package}});
-       my $suite = $schema->resultset('Suite')->find_or_create({suite_name => $suite});
-       my $sv = $schema->resultset('SrcVer')->find_or_create({src_pkg =>$sp->id,
-                                                              ver => $pkg->{Version}});
-       my @addrs = getparsedaddrs($pkg->{Maintainer} // '');
-       if (@addrs) {
-           my $mc = $schema->resultset('Correspondent')->
-               find_or_create({addr => lc($addrs[0]->address())});
-           my $full_name = $addrs[0]->phrase();
-           $full_name =~ s/^\"|\"$//g;
-           $full_name =~ s/^\s+|\s+$//g;
-           $sv->discard_changes;
-           $sv->find_or_create_related('maintainer',
-                                      {name => $full_name,
+sub load_packages {
+    my ($schema,$suite,$pkgs,$p) = @_;
+    my $suite_id = $schema->resultset('Suite')->
+       find_or_create({codename => $suite})->id;
+    my %maint_cache;
+    my %arch_cache;
+    my %source_cache;
+    my $src_max_last_modified = $schema->resultset('SrcAssociation')->
+       search_rs({suite => $suite_id},
+                {order_by => {-desc => ['me.modified']},
+                 rows => 1,
+                 page => 1
+                }
+                )->single();
+    my $bin_max_last_modified = $schema->resultset('BinAssociation')->
+       search_rs({suite => $suite_id},
+                {order_by => {-desc => ['me.modified']},
+                 rows => 1,
+                 page => 1
+                }
+                )->single();
+    print STDERR time." handling packages\n";
+    for my $pkg_tuple (@{$pkgs}) {
+       my ($arch,$component,$pkg) = @{$pkg_tuple};
+       $p->update() if $p;
+       if ($arch eq 'source') {
+           my $source = $pkg->{Package};
+           my $source_ver = $pkg->{Version};
+           if (not exists $maint_cache{$pkg->{Maintainer}}) {
+               my @addrs = getparsedaddrs($pkg->{Maintainer} // '');
+               if (@addrs) {
+                   my $mc = $schema->resultset('Correspondent')->
+                       find_or_create({addr => lc($addrs[0]->address())},
+                                     {key => 'correspondent_addr_idx'}
+                                     );
+                   my $full_name = $addrs[0]->phrase();
+                   $full_name =~ s/^\"|\"$//g;
+                   $full_name =~ s/^\s+|\s+$//g;
+                   # $sv->discard_changes;
+                   my $maint = $schema->resultset('Maintainer')->
+                       find_or_create({name => $pkg->{Maintainer},
                                        correspondent => $mc->id},
-                                      );
-           $mc->update_or_create_related('correspondent_full_names',
-                                        {full_name=>$full_name,
-                                         last_seen => 'NOW()'});
+                                     {key => 'maintainer_name_idx'},
+                                     );
+                   $mc->find_or_create_related('correspondent_full_names',
+                                              {full_name => $full_name},
+                                              {key => 'correspondent_full_name_correspondent_full_name_idx'}
+                                              );
+                   $mc->update;
+                   $maint_cache{$pkg->{Maintainer}} = $maint;
+               }
+           }
+           if (not exists $source_cache{$source}{$source_ver}) {
+               my $sp = $schema->resultset('SrcPkg')->
+                   find_or_create({pkg => $source});
+               my $sv = $sp->find_or_create_related('src_vers',
+                                                   {ver => $source_ver});
+               $source_cache{$source}{$source_ver} = $sv;
+               if (exists $maint_cache{$pkg->{Maintainer}}) {
+                   $source_cache{$source}{$source_ver}->
+                       set_from_related('maintainer',
+                                        $maint_cache{$pkg->{Maintainer}}
+                                       );
+                   $source_cache{$source}{$source_ver}->update;
+               }
+           }
+           $schema->resultset('SrcAssociation')->
+               update_or_create({suite => $suite_id,
+                                 source => $source_cache{$source}{$source_ver}->id,
+                                 modified => 'NOW()',
+                                },
+                               {key => 'src_associations_source_suite'}
+                               );
+       } else {
+           my $ar = $schema->resultset('Arch')->
+               find_or_create(arch => $arch);
+           my $bp = $schema->resultset('BinPkg')->
+               find_or_create({pkg => $pkg->{Package}});
+           my $source = $pkg->{Source} // $pkg->{Package};
+           my $source_ver = $pkg->{Version};
+           if ($source =~ /^\s*(\S+) \(([^\)]+)\)\s*$/) {
+               ($source,$source_ver) = ($1,$2);
+           }
+           if (not exists $source_cache{$source}{$source_ver}) {
+               my $sp = $schema->resultset('SrcPkg')->
+                   find_or_create({pkg => $source});
+               my $sv = $sp->find_or_create_related('src_vers',
+                                                   {ver => $source_ver});
+               $source_cache{$source}{$source_ver} = $sv;
+           }
+           my $bv = $bp->find_or_create_related('bin_vers',
+                                               {ver => $pkg->{Version},
+                                                src_ver => $source_cache{$source}{$source_ver}->id,
+                                                arch => $ar->id,
+                                               });
+           $schema->resultset('BinAssociation')->
+               update_or_create({suite => $suite_id,
+                                 bin => $bv->id,
+                                 modified => 'NOW()',
+                                },
+                               {key => 'bin_associations_bin_suite'}
+                               );
        }
-       # update the link for this source package
-       $schema->
-           txndo(sub {
-                    # delete associations for this source package in this
-                    # suite
-                    $schema->resultset('SrcAssociations')->
-                        search_rs({suite => $suite->id,})->
-                        search_related_rs('src_pkg',
-                                         {src_pkg => $sp->id})->delete;
-                    $schema->resultset('SrcAssociations')->
-                        create({suite => $suite->id,
-                                source => $sv->id,
-                               });
-                });
-    } else {
-       my $bp = $schema->resultset('BinPkg')->find_or_create({pkg => $pkg->{Package}});
-       my $suite = $schema->resultset('Suite')->find_or_create({suite_name => $suite});
-       my ($bv) = $bp->search_related('bin_vers',{ver => $pkg->{Version}});
-       # if there isn't already a binary version for this package, we don't
-       # know what source it belongs to, so we can't associate it with a
-       # release
-       return if (not defined $bv);
-       $schema->
-           txndo(sub {
-                     $schema->resultset('BinAssociations')->
-                         search_rs({suite => $suite->id,})->
-                         search_related_rs('bin_pkg',
-                                          {bin_pkg_id => $bp->id}
-                                          )->delete;
-                     $schema->resultset('BinAssociations')->
-                         create({suite => $suite->id,
-                                 bin => $bv->id
-                                });
-                 });
     }
+    print STDERR time." deleting associations\n";
+    # delete old binary associations in this suite which have not recently been
+    # modified
+    $schema->resultset('BinAssociation')->
+       search_rs({suite => $suite_id,
+                  modified => {'<',$bin_max_last_modified->modified()},
+                 }) if defined
+                     $bin_max_last_modified;
+    $schema->resultset('SrcAssociation')->
+       search_rs({suite => $suite_id,
+                  modified => {'<',$src_max_last_modified->modified()},
+                 }) if defined
+                     $src_max_last_modified;
 }
 
 =back
index ae267fe83393782d412e0709411358f130a7b0b3..e9b62ef9016ed7b9d5620d32c58fc00894d7e040 100755 (executable)
@@ -98,7 +98,8 @@ Display this manual.
 
 use vars qw($DEBUG);
 
-use Debbugs::Common qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers);
+use Debbugs::Common (qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers),
+                    qw(hash_slice));
 use Debbugs::Config qw(:config);
 use Debbugs::Status qw(read_bug split_status_fields);
 use Debbugs::Log;
@@ -106,8 +107,12 @@ use Debbugs::DB;
 use Debbugs::DB::Load qw(load_bug handle_load_bug_queue :load_package :load_suite);
 use DateTime;
 use File::stat;
+use File::Basename;
+use File::Spec;
 use IO::Dir;
+use IO::File;
 use IO::Uncompress::AnyUncompress;
+use Encode qw(decode_utf8);
 
 my %options =
     (debug           => 0,
@@ -157,6 +162,7 @@ my %subcommands =
                },
      'packages' => {function => \&add_packages,
                    arguments => {'ftpdists=s' => 1,
+                                 'suites=s@' => 0,
                                 },
                   },
      'help' => {function => sub {pod2usage({verbose => 2});}}
@@ -202,6 +208,9 @@ if (not defined $subcommand) {
     pod2usage();
 }
 
+binmode(STDOUT,':encoding(UTF-8)');
+binmode(STDERR,':encoding(UTF-8)');
+
 my $opts =
     handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
 $subcommands{$subcommand}{function}->(\%options,$opts,$prog_bar,\%config,\@ARGV);
@@ -507,8 +516,6 @@ sub add_logs {
 sub add_packages {
     my ($options,$opts,$p,$config,$argv) = @_;
 
-    my $s = db_connect($options);
-
     my $dist_dir = IO::Dir->new($opts->{ftpdists});
     my @dist_names =
        grep { $_ !~ /^\./ and
@@ -516,69 +523,67 @@ sub add_packages {
               not -l $opts->{ftpdists}.'/'.$_
           } $dist_dir->read;
     my %s_p;
-    my %s_info;
     while (my $dist = shift @dist_names) {
        my $dist_dir = $opts->{ftpdists}.'/'.$dist;
-       # parse release
-       my $rfh =  IO::Uncompress::AnyUncompress->new($dist_dir.'/Release');
-       my %dist_info;
-       my $in_sha1;
-       my %p_f;
-       while (<$rfh>) {
-           chomp;
-           if (s/^(\S+):\s*//) {
-               if ($1 eq 'SHA1'or $1 eq 'SHA256') {
-                   $in_sha1 = 1;
-                   next;
-               }
-               $dist_info{$1} = $_;
-           } elsif ($in_sha1) {
-               s/^\s//;
-               my ($sha,$size,$file) = split /\s+/,$_;
-               next unless $file =~ /(?:Packages|Sources)(?:\.gz|\.xz)$/;
-               next unless $file =~ m{^([^/]+)/([^/]+)/([^/]+)$};
-               my ($component,$arch,$package_source) = ($1,$2,$3);
-               $arch =~ s/binary-//;
-               next if exists $p_f{$component}{$arch};
-               $p_f{$component}{$arch} = $dist_dir.'/'.$file;
-           }
+       my ($dist_info,$package_files) =
+           read_release_file($dist_dir.'/Release');
+       $s_p{$dist_info->{Codename}} = $package_files;
+    }
+    my $tot = 0;
+    for my $suite (keys %s_p) {
+       for my $component (keys %{$s_p{$suite}}) {
+           $tot += scalar keys %{$s_p{$suite}{$component}};
        }
-       $s_p{$dist_info{Suite}} = \%p_f;
-       $s_info{$dist_info{Suite}} = \%s_info;
     }
+    $p->target($tot) if $p;
+    my $i = 0;
+    my $avg_pkgs = 0;
+    my $tot_suites = scalar keys %s_p;
+    my $done_suites=0;
+    my $completed_pkgs=0;
     # parse packages files
     for my $suite (keys %s_p) {
+       print STDERR "working on $suite\n";
+       my @pkgs;
        for my $component (keys %{$s_p{$suite}}) {
-           for my $arch (keys %{$s_p{$suite}{$component}}) {
-               my $pfh =  IO::Uncompress::AnyUncompress->new($s_p{$suite}{$component}{$arch}) or
+           my @archs = keys %{$s_p{$suite}{$component}};
+           if (grep {$_ eq 'source'} @archs) {
+               @archs = ('source',grep {$_ ne 'source'} @archs);
+           }
+           for my $arch (@archs) {
+               my $pfh =  open_compressed_file($s_p{$suite}{$component}{$arch}) or
                    die "Unable to open $s_p{$suite}{$component}{$arch} for reading: $!";
-               my $lastkey;
-               my %pkg;
+               local $_;
+               local $/ = '';  # paragraph mode
                while (<$pfh>) {
-                   if (/^$/) {
-                       load_package($s,$suite,$component,$arch,\%pkg);
-                       %pkg = ();
-                       next;
-                   }
-                   if (my ($key, $value) = m/^(\S+): (.*)/) {
-                       $pkg{$key} = $value;
-                       $lastkey=$key;
-                   }
-                   else {
-                       s/ //;
-                       s/^\.$//;
-                       chomp;
-                       $pkg{$lastkey} .= "\n" . $_;
+                   my %pkg;
+                   for my $field (qw(Package Maintainer Version Source)) {
+                       /^\Q$field\E: (.*)/m;
+                       $pkg{$field} = $1;
                    }
-               }
-               if (keys %pkg) {
-                   load_package($s,$suite,$component,$arch,\%pkg);
+                   next unless defined $pkg{Package} and
+                       defined $pkg{Version};
+                   push @pkgs,[$arch,$component,\%pkg];
                }
            }
        }
+       my $s = db_connect($options);
+       if ($avg_pkgs==0) {
+           $avg_pkgs = @pkgs;
+       }
+        $p->target($avg_pkgs*($tot_suites-$done_suites-1)+
+                  $completed_pkgs+@pkgs) if $p;
+       $s->txn_do(sub {
+                      Debbugs::DB::Load::load_packages($s,
+                                                       $suite,
+                                                       \@pkgs,
+                                                       $p)
+                      });
+       $avg_pkgs=($avg_pkgs*$done_suites + @pkgs)/($done_suites+1);
+       $completed_pkgs += @pkgs;
+       $done_suites++;
     }
-    use Data::Printer;
-    p %s_p;
+    $p->remove() if $p;
 }
 
 sub handle_subcommand_arguments {
@@ -622,6 +627,57 @@ sub db_connect {
         die "Unable to connect to database: ";
 }
 
+sub open_compressed_file {
+    my ($file) = @_;
+    my $fh;
+    my $mode = '<:encoding(UTF-8)';
+    my @opts;
+    if ($file =~ /\.gz$/) {
+       $mode = '-|:encoding(UTF-8)';
+       push @opts,'gzip','-dc';
+    }
+    if ($file =~ /\.xz$/) {
+       $mode = '-|:encoding(UTF-8)';
+       push @opts,'xz','-dc';
+    }
+    if ($file =~ /\.bz2$/) {
+       $mode = '-|:encoding(UTF-8)';
+       push @opts,'bzip2','-dc';
+    }
+    open($fh,$mode,@opts,$file);
+    return $fh;
+}
+
+sub read_release_file {
+    my ($file) = @_;
+    # parse release
+    my $rfh =  open_compressed_file($file) or
+       die "Unable to open $file for reading: $!";
+    my %dist_info;
+    my $in_sha1;
+    my %p_f;
+    while (<$rfh>) {
+       chomp;
+       if (s/^(\S+):\s*//) {
+           if ($1 eq 'SHA1'or $1 eq 'SHA256') {
+               $in_sha1 = 1;
+               next;
+           }
+           $dist_info{$1} = $_;
+       } elsif ($in_sha1) {
+           s/^\s//;
+           my ($sha,$size,$f) = split /\s+/,$_;
+           next unless $f =~ /(?:Packages|Sources)(?:\.gz|\.xz)$/;
+           next unless $f =~ m{^([^/]+)/([^/]+)/([^/]+)$};
+           my ($component,$arch,$package_source) = ($1,$2,$3);
+           $arch =~ s/binary-//;
+           next if exists $p_f{$component}{$arch};
+           $p_f{$component}{$arch} = File::Spec->catfile(dirname($file),$f);
+       }
+    }
+    return (\%dist_info,\%p_f);
+}
+
 sub walk_bugs {
     my ($dirs,$p,$what,$verbose,$sub) = @_;
     my @dirs = @{$dirs};