X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bin%2Fdebbugs-loadsql;h=9597447aaf0997c8e01efbd0ee2848fbce462d41;hb=781dc48724abfa2c0cc554f4ba34d8e6c3765f99;hp=f25e53c1296fb8d2325eb66041076b9672447acc;hpb=eb99bedd4390c9f7f2dc974639e78aa42816624f;p=debbugs.git diff --git a/bin/debbugs-loadsql b/bin/debbugs-loadsql index f25e53c..9597447 100755 --- a/bin/debbugs-loadsql +++ b/bin/debbugs-loadsql @@ -17,10 +17,13 @@ debbugs-loadsql -- load debbugs sql database =head1 SYNOPSIS -debbugs-loadsql [options] +debbugs-loadsql [options] [subcommand] + Subcommands: + bugs help versions configuration + suites logs packages debinfo Options: - --quick, -q only load changed bugs + --quick, -q only load changed things --progress Show progress bar --service, -s service name --sysconfdir, -c postgresql service config dir @@ -37,15 +40,45 @@ Display this manual =head2 bugs -Add bugs +Add bugs (subject, number, etc) to the database + + --preload create all bugs first, then add information =head2 versions -Add versions +Add version descendant information (which version is based on which version) to +the database =head2 maintainers -Add source maintainers +Add source maintainers to the BTS + +=head2 configuration + +Add debbugs configuration information (tags, severity, etc) + +=head2 suites + +Add suite information from ftp distribution + + --ftpdists location of FTP mirror + +=head2 logs + +Add bug logs + +=head2 packages + +Add package information from the ftp archive + + --ftpdists location of FTP mirror + --suites Suite to operate on + +=head2 debinfo + +Add package information from a debinfo file + + --null -0 names of debinfo files are null separated =head1 OPTIONS @@ -98,15 +131,22 @@ Display this manual. use vars qw($DEBUG); -use Debbugs::Common qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers); +use Debbugs::Common (qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers), + qw(hash_slice open_compressed_file),); use Debbugs::Config qw(:config); use Debbugs::Status qw(read_bug split_status_fields); use Debbugs::Log; use Debbugs::DB; -use Debbugs::DB::Load qw(load_bug handle_load_bug_queue); +use Debbugs::DB::Load qw(:load_bug :load_package :load_suite); use DateTime; use File::stat; - +use File::Basename; +use File::Spec; +use IO::Dir; +use IO::File; +use IO::Uncompress::AnyUncompress; +use Encode qw(decode_utf8); +use List::MoreUtils qw(natatime); my %options = (debug => 0, @@ -138,19 +178,28 @@ $DEBUG = $options{debug}; my %subcommands = ('bugs' => {function => \&add_bugs, + arguments => {'preload' => 0}, }, 'versions' => {function => \&add_versions, }, 'debinfo' => {function => \&add_debinfo, + arguments => {'0|null' => 0}, }, 'maintainers' => {function => \&add_maintainers, }, 'configuration' => {function => \&add_configuration, }, - 'suites' => {function => \&add_suites, + 'suites' => {function => \&add_suite, + arguments => {'ftpdists=s' => 1, + }, }, 'logs' => {function => \&add_logs, }, + 'packages' => {function => \&add_packages, + arguments => {'ftpdists=s' => 1, + 'suites=s@' => 0, + }, + }, 'help' => {function => sub {pod2usage({verbose => 2});}} ); @@ -194,6 +243,9 @@ if (not defined $subcommand) { pod2usage(); } +binmode(STDOUT,':encoding(UTF-8)'); +binmode(STDERR,':encoding(UTF-8)'); + my $opts = handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments}); $subcommands{$subcommand}{function}->(\%options,$opts,$prog_bar,\%config,\@ARGV); @@ -219,36 +271,56 @@ sub add_bugs { my %severities; my %queue; + if ($opts->{preload}) { + my @bugs; + walk_bugs([(@{$argv}?@{$argv} : $initialdir)], + undef, + 'summary', + undef, + sub { + push @bugs,@_; + }, + 10000 + ); + $s->resultset('Bug')->quick_insert_bugs(@bugs); + } walk_bugs([(@{$argv}?@{$argv} : $initialdir)], $p, 'summary', $verbose, sub { - my $bug = shift; - my $stat = stat(getbugcomponent($bug,'summary',$initialdir)); - if (not defined $stat) { + my @bugs = @_; + my @bugs_to_update; + if ($options{quick}) { + for my $bug (@bugs) { + my $stat = stat(getbugcomponent($bug,'summary',$initialdir)); + if (not defined $stat) { print STDERR "Unable to stat $bug $!\n"; next; + } + my $rs = $s->resultset('Bug')->search({id=>$bug})->single(); + next if defined $rs and $stat->mtime <= $rs->last_modified()->epoch(); + push @bugs_to_update, $bug; } - if ($options{quick}) { - my $rs = $s->resultset('Bug')->search({bug=>$bug})->single(); - next if defined $rs and $stat->mtime < $rs->last_modified()->epoch(); - } - my $data = read_bug(bug => $bug, - location => $initialdir); - eval { - load_bug(db => $s, - data => split_status_fields($data), - tags => \%tags, - severities => \%severities, - queue => \%queue); - }; - if ($@) { - use Data::Dumper; - print STDERR Dumper($data) if $DEBUG; - die "failure while trying to load bug $bug\n$@"; - } - } + } else { + @bugs_to_update = @bugs; + } + eval { + $s->txn_do(sub { + for my $bug (@bugs_to_update) { + load_bug(db => $s, + bug => $bug, + tags => \%tags, + severities => \%severities, + queue => \%queue); + } + }); + }; + if ($@) { + die "failure while trying to load bug: $@"; + } + }, + 50 ); handle_load_bug_queue(db => $s, queue => \%queue); @@ -279,19 +351,21 @@ sub add_versions { my $sp; if (not defined $src_pkgs{$versions[$i][0]}) { $src_pkgs{$versions[$i][0]} = - $s->resultset('SrcPkg')->find_or_create({pkg => $versions[$i][0]}); + $s->resultset('SrcPkg')-> + get_src_pkg_id($versions[$i][0]); } $sp = $src_pkgs{$versions[$i][0]}; # There's probably something wrong if the source package # doesn't exist, but we'll skip it for now - next unless defined $sp; - my $sv = $s->resultset('SrcVer')->find({src_pkg=>$sp->id(), + last if not defined $sp; + my $sv = $s->resultset('SrcVer')->find({src_pkg=>$sp, ver => $versions[$i][1], }); + last if not defined $sv; if (defined $ancestor_sv and defined $sv and not defined $sv->based_on()) { - $sv->update({based_on => $ancestor_sv->id()}) + $sv->update({based_on => $ancestor_sv}) } - $ancestor_sv = $sv; + $ancestor_sv = $sv->id(); } $p->update() if $p; } @@ -302,44 +376,56 @@ sub add_debinfo { my ($options,$opts,$p,$config,$argv) = @_; my @files = @{$argv}; + if (not @files) { + { + local $/ = "\n"; + local $/ = "\0" if $opts->{0}; + while () { + s/\n$// unless $opts->{0}; + s/\0$// if $opts->{0}; + push @files, $_; + } + } + } return unless @files; my $s = db_connect($options); - my %arch; $p->target(scalar @files) if $p; - for my $file (@files) { - my $fh = IO::File->new($file,'r') or - die "Unable to open $file for reading: $!"; - my $f_stat = stat($file); - while (<$fh>) { - chomp; - next unless length $_; - my ($binname, $binver, $binarch, $srcname, $srcver) = split; - # if $srcver is not defined, this is probably a broken - # .debinfo file [they were causing #686106, see commit - # 49c85ab8 in dak.] Basically, $binarch didn't get put into - # the file, so we'll fudge it from the filename. - if (not defined $srcver) { - ($srcname,$srcver) = ($binarch,$srcname); - ($binarch) = $file =~ /_([^\.]+)\.debinfo/; - } - my $sp = $s->resultset('SrcPkg')->find_or_create({pkg => $srcname}); - my $sv = $s->resultset('SrcVer')->find_or_create({src_pkg =>$sp->id(), - ver => $srcver}); - my $arch; - if (defined $arch{$binarch}) { - $arch = $arch{$binarch}; - } else { - $arch = $s->resultset('Arch')->find_or_create({arch => $binarch}); - $arch{$binarch} = $arch; - } - my $bp = $s->resultset('BinPkg')->find_or_create({pkg => $binname}); - $s->resultset('BinVer')->find_or_create({bin_pkg => $bp->id(), - src_ver => $sv->id(), - arch => $arch->id(), - ver => $binver, - }); - } - $p->update() if $p; + my $it = natatime 100, @files; + while (my @v = $it->()) { + my %cache; + my @debinfos; + for my $file (@v) { + my $fh = IO::File->new($file,'r') or + die "Unable to open $file for reading: $!"; + my $f_stat = stat($file); + my $ct_date = DateTime->from_epoch(epoch => $f_stat->ctime); + while (<$fh>) { + chomp; + next unless length $_; + my ($binname, $binver, $binarch, $srcname, $srcver) = split; + # if $srcver is not defined, this is probably a broken + # .debinfo file [they were causing #686106, see commit + # 49c85ab8 in dak.] Basically, $binarch didn't get put into + # the file, so we'll fudge it from the filename. + if (not defined $srcver) { + ($srcname,$srcver) = ($binarch,$srcname); + ($binarch) = $file =~ /_([^\.]+)\.debinfo/; + } + if (not defined $srcver) { + print STDERR "malformed debinfo (no srcver): $file\n"; + next; + } + push @debinfos, + [$binname,$binver,$binarch,$srcname,$srcver,$ct_date]; + } + } + $s->txn_do( + sub { + for my $di (@debinfos) { + Debbugs::DB::Load::load_debinfo($s,@{$di}[0..5],\%cache); + } + }); + $p->update($p->last_update()+@v) if $p; } $p->remove() if $p; } @@ -349,42 +435,36 @@ sub add_maintainers { my $s = db_connect($options); my $maintainers = getsourcemaintainers(); - $p->target(scalar keys %{$maintainers}) if $p; - for my $pkg (keys %{$maintainers}) { - my $maint = $maintainers->{$pkg}; - # see if a maintainer already exists; if so, we don't do - # anything here - my $maint_r = $s->resultset('Maintainer')-> - find({name => $maint}); - if (not defined $maint_r) { - # get e-mail address of maintainer - my $addr = getparsedaddrs($maint); - my $e_mail = $addr->address(); - my $full_name = $addr->phrase(); - $full_name =~ s/^\"|\"$//g; - $full_name =~ s/^\s+|\s+$//g; - # find correspondent - my $correspondent = $s->resultset('Correspondent')-> - find_or_create({addr => $e_mail}); - if (length $full_name) { - my $c_full_name = $correspondent->find_or_create_related('correspondent_full_names', - {full_name => $full_name}) if length $full_name; - $c_full_name->update({last_seen => 'NOW()'}); - } - $maint_r = - $s->resultset('Maintainer')-> - find_or_create({name => $maint, - correspondent => $correspondent, - }); - } - # add the maintainer to the source package for packages with - # no maintainer - $s->txn_do(sub { - $s->resultset('SrcPkg')->search({pkg => $pkg})-> - search_related_rs('src_vers',{ maintainer => undef})-> - update_all({maintainer => $maint_r->id()}); - }); - $p->update() if $p; + $p->target(2) if $p; + ## get all of the maintainers, and add the missing ones + my $maints = $s->resultset('Maintainer')-> + get_maintainers(values %{$maintainers}); + $p->update(); + my @svs = $s->resultset('SrcVer')-> + search({maintainer => undef + }, + {join => 'src_pkg', + group_by => 'me.src_pkg, src_pkg.pkg', + result_class => 'DBIx::Class::ResultClass::HashRefInflator', + columns => [qw(me.src_pkg src_pkg.pkg)], + } + )->all(); + $p->target(2+@svs) if $p; + $p->update() if $p; + for my $sv (@svs) { + if (exists $maintainers->{$sv->{src_pkg}{pkg}}) { + my $pkg = $sv->{src_pkg}{pkg}; + my $maint = $maints-> + {$maintainers->{$pkg}}; + $s->txn_do(sub {$s->resultset('SrcVer')-> + search({maintainer => undef, + 'src_pkg.pkg' => $pkg + }, + {join => 'src_pkg'} + )->update({maintainer => $maint}) + }); + } + $p->update() if $p; } $p->remove() if $p; } @@ -396,25 +476,35 @@ sub add_configuration { # tags # add all tags + my %tags; + for my $tag (@{$config{tags}}) { + $tags{$tag} = 1; + $s->resultset('Tag')->find_or_create({tag => $tag}); + } # mark obsolete tags + for my $tag ($s->resultset('Tag')->search_rs()->all()) { + next if exists $tags{$tag->tag}; + $tag->obsolete(1); + $tag->update; + } # severities my %sev_names; - my $order = 0; - for my $sev_name (@{$config{severities}}) { + my $order = -1; + for my $sev_name (($config{default_severity},@{$config{severity_list}})) { # add all severitites my $sev = $s->resultset('Severity')->find_or_create({severity => $sev_name}); # mark strong severities - if (grep $sev_name $config{strong_severities}) { + if (grep {$_ eq $sev_name} @{$config{strong_severities}}) { $sev->strong(1); } - $sev->order($order); + $sev->ordering($order); $sev->update(); $order++; $sev_names{$sev_name} = 1; } # mark obsolete severities - for my $sev ($s->resultset('Severity')->find()) { + for my $sev ($s->resultset('Severity')->search_rs()->all()) { next if exists $sev_names{$sev->severity()}; $sev->obsolete(1); $sev->update(); @@ -424,7 +514,20 @@ sub add_configuration { sub add_suite { my ($options,$opts,$p,$config,$argv) = @_; # suites - die "add_suite is currently not implemented; modify suites manually using SQL." + + my $s = db_connect($options); + my $dist_dir = IO::Dir->new($opts->{ftpdists}); + my @dist_names = + grep { $_ !~ /^\./ and + -d $opts->{ftpdists}.'/'.$_ and + not -l $opts->{ftpdists}.'/'.$_ + } $dist_dir->read; + while (my $dist = shift @dist_names) { + my $dist_dir = $opts->{ftpdists}.'/'.$dist; + my ($dist_info,$package_files) = + read_release_file($dist_dir.'/Release'); + load_suite($s,$dist_info); + } } sub add_logs { @@ -452,7 +555,18 @@ sub add_logs { $verbose, sub { my $bug = shift; - eval { + my $stat = stat(getbugcomponent($bug,'log',$initialdir)); + if (not defined $stat) { + print STDERR "Unable to stat $bug $!\n"; + next; + } + if ($options{quick}) { + my $rs = $s->resultset('Bug')-> + search({id=>$bug})->single(); + next if defined $rs and + $stat->mtime <= $rs->last_modified()->epoch(); + } + eval { load_bug_log(db => $s, bug => $bug); }; @@ -463,7 +577,73 @@ sub add_logs { } sub add_packages { + my ($options,$opts,$p,$config,$argv) = @_; + my $dist_dir = IO::Dir->new($opts->{ftpdists}); + my @dist_names = + grep { $_ !~ /^\./ and + -d $opts->{ftpdists}.'/'.$_ and + not -l $opts->{ftpdists}.'/'.$_ + } $dist_dir->read; + my %s_p; + while (my $dist = shift @dist_names) { + my $dist_dir = $opts->{ftpdists}.'/'.$dist; + my ($dist_info,$package_files) = + read_release_file($dist_dir.'/Release'); + $s_p{$dist_info->{Codename}} = $package_files; + } + my $tot = 0; + for my $suite (keys %s_p) { + for my $component (keys %{$s_p{$suite}}) { + $tot += scalar keys %{$s_p{$suite}{$component}}; + } + } + $p->target($tot) if $p; + my $i = 0; + my $avg_pkgs = 0; + my $tot_suites = scalar keys %s_p; + my $done_suites=0; + my $completed_pkgs=0; + # parse packages files + for my $suite (keys %s_p) { + my @pkgs; + for my $component (keys %{$s_p{$suite}}) { + my @archs = keys %{$s_p{$suite}{$component}}; + if (grep {$_ eq 'source'} @archs) { + @archs = ('source',grep {$_ ne 'source'} @archs); + } + for my $arch (@archs) { + my $pfh = open_compressed_file($s_p{$suite}{$component}{$arch}) or + die "Unable to open $s_p{$suite}{$component}{$arch} for reading: $!"; + local $_; + local $/ = ''; # paragraph mode + while (<$pfh>) { + my %pkg; + for my $field (qw(Package Maintainer Version Source)) { + /^\Q$field\E: (.*)/m; + $pkg{$field} = $1; + } + next unless defined $pkg{Package} and + defined $pkg{Version}; + push @pkgs,[$arch,$component,\%pkg]; + } + } + } + my $s = db_connect($options); + if ($avg_pkgs==0) { + $avg_pkgs = @pkgs; + } + $p->target($avg_pkgs*($tot_suites-$done_suites-1)+ + $completed_pkgs+@pkgs) if $p; + load_packages($s, + $suite, + \@pkgs, + $p); + $avg_pkgs=($avg_pkgs*$done_suites + @pkgs)/($done_suites+1); + $completed_pkgs += @pkgs; + $done_suites++; + } + $p->remove() if $p; } sub handle_subcommand_arguments { @@ -503,17 +683,48 @@ sub db_connect { my ($options) = @_; # connect to the database; figure out how to handle errors # properly here. - my $s = Debbugs::DB->connect('dbi:Pg:service='.$options->{service}) or + my $s = Debbugs::DB->connect($options->{service}) or die "Unable to connect to database: "; } +sub read_release_file { + my ($file) = @_; + # parse release + my $rfh = open_compressed_file($file) or + die "Unable to open $file for reading: $!"; + my %dist_info; + my $in_sha1; + my %p_f; + while (<$rfh>) { + chomp; + if (s/^(\S+):\s*//) { + if ($1 eq 'SHA1'or $1 eq 'SHA256') { + $in_sha1 = 1; + next; + } + $dist_info{$1} = $_; + } elsif ($in_sha1) { + s/^\s//; + my ($sha,$size,$f) = split /\s+/,$_; + next unless $f =~ /(?:Packages|Sources)(?:\.gz|\.xz)$/; + next unless $f =~ m{^([^/]+)/([^/]+)/([^/]+)$}; + my ($component,$arch,$package_source) = ($1,$2,$3); + $arch =~ s/binary-//; + next if exists $p_f{$component}{$arch}; + $p_f{$component}{$arch} = File::Spec->catfile(dirname($file),$f); + } + } + return (\%dist_info,\%p_f); +} + sub walk_bugs { - my ($dirs,$p,$what,$verbose,$sub) = @_; + my ($dirs,$p,$what,$verbose,$sub,$n) = @_; my @dirs = @{$dirs}; my $tot_dirs = @dirs; my $done_dirs = 0; my $avg_subfiles = 0; my $completed_files = 0; + $n //= 1; while (my $dir = shift @dirs) { printf "Doing dir %s ...\n", $dir if $verbose; @@ -533,11 +744,13 @@ sub walk_bugs { $avg_subfiles = ($avg_subfiles * $done_dirs + @list) / ($done_dirs+1); $done_dirs += 1; - for my $bug (@list) { - $completed_files++; - $p->update($completed_files) if $p; - print "Up to $completed_files bugs...\n" if ($completed_files % 100 == 0 && $verbose); - $sub->($bug); + my $it = natatime $n,@list; + while (my @bugs = $it->()) { + $sub->(@bugs); + $completed_files += scalar @bugs; + $p->update($completed_files) if $p; + print "Up to $completed_files bugs...\n" + if ($completed_files % 100 == 0 && $verbose); } } $p->remove() if $p;