X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bin%2Fdebbugs-loadsql;h=fe9ce6b0177200f93940143fa34c52eb8f4b4523;hb=1fbaca61f2f551b5b2645ce2233a1d964bc93b30;hp=59a8502d4bc14dc6d2cb261b66cbe43b78b24bcc;hpb=6ba69a1c420be91ba3489e678c2a99bab08975fa;p=debbugs.git diff --git a/bin/debbugs-loadsql b/bin/debbugs-loadsql index 59a8502..fe9ce6b 100755 --- a/bin/debbugs-loadsql +++ b/bin/debbugs-loadsql @@ -17,10 +17,13 @@ debbugs-loadsql -- load debbugs sql database =head1 SYNOPSIS -debbugs-loadsql [options] +debbugs-loadsql [options] [subcommand] + Subcommands: + bugs help versions configuration + suites logs packages debinfo Options: - --quick, -q only load changed bugs + --quick, -q only load changed things --progress Show progress bar --service, -s service name --sysconfdir, -c postgresql service config dir @@ -37,15 +40,45 @@ Display this manual =head2 bugs -Add bugs +Add bugs (subject, number, etc) to the database + + --preload create all bugs first, then add information =head2 versions -Add versions +Add version descendant information (which version is based on which version) to +the database =head2 maintainers -Add source maintainers +Add source maintainers to the BTS + +=head2 configuration + +Add debbugs configuration information (tags, severity, etc) + +=head2 suites + +Add suite information from ftp distribution + + --ftpdists location of FTP mirror + +=head2 logs + +Add bug logs + +=head2 packages + +Add package information from the ftp archive + + --ftpdists location of FTP mirror + --suites Suite to operate on + +=head2 debinfo + +Add package information from a debinfo file + + --null -0 names of debinfo files are null separated =head1 OPTIONS @@ -98,8 +131,18 @@ Display this manual. use vars qw($DEBUG); -use Debbugs::Common (qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers), - qw(hash_slice)); +# if we're running out of git, we want to use the git base directory as the +# first INC directory. If you're not running out of git, or someone has given a +# non-absolute INC, don't do that. +use FindBin; +use if (-d $FindBin::Bin.'/../.git/' && $INC[0] =~ m#^/#), + lib => $FindBin::Bin.'/../'; + +use Debbugs::Common (qw(checkpid lockpid get_hashname getparsedaddrs), + qw(getbugcomponent make_list getsourcemaintainers), + qw(getbuglocation), + qw(walk_bugs), + qw(hash_slice open_compressed_file),); use Debbugs::Config qw(:config); use Debbugs::Status qw(read_bug split_status_fields); use Debbugs::Log; @@ -109,10 +152,12 @@ use DateTime; use File::stat; use File::Basename; use File::Spec; +use File::Find; use IO::Dir; use IO::File; use IO::Uncompress::AnyUncompress; use Encode qw(decode_utf8); +use List::AllUtils qw(natatime); my %options = (debug => 0, @@ -129,6 +174,7 @@ Getopt::Long::Configure('pass_through'); GetOptions(\%options, 'quick|q', 'service|s=s', + 'dsn=s', 'sysconfdir|c=s', 'progress!', 'spool_dir|spool-dir=s', @@ -144,12 +190,16 @@ $DEBUG = $options{debug}; my %subcommands = ('bugs' => {function => \&add_bugs, - arguments => {'preload' => 0}, + arguments => {'preload' => 0, + 'bugs=s@' => 0, + }, }, 'versions' => {function => \&add_versions, }, 'debinfo' => {function => \&add_debinfo, - arguments => {'0|null' => 0}, + arguments => {'0|null' => 0, + 'debinfo_dir|debinfo-dir=s' => 0, + }, }, 'maintainers' => {function => \&add_maintainers, }, @@ -161,6 +211,8 @@ my %subcommands = }, 'logs' => {function => \&add_logs, }, + 'bugs_and_logs' => {function => \&add_bugs_and_logs, + }, 'packages' => {function => \&add_packages, arguments => {'ftpdists=s' => 1, 'suites=s@' => 0, @@ -223,61 +275,62 @@ sub add_bugs { my $verbose = $options->{debug}; - my $initialdir = "db-h"; - - if (defined $argv->[0] and $argv->[0] eq "archive") { - $initialdir = "archive"; + my @dirs; + if (@{$argv}) { + @dirs = @{$argv}; + } elsif (not defined $opts->{bugs}) { + @dirs = "db-h" } + $opts->{bugs} //= []; + my $s = db_connect($options); - my $time = 0; - my $start_time = time; my %tags; my %severities; my %queue; if ($opts->{preload}) { my @bugs; - walk_bugs([(@{$argv}?@{$argv} : $initialdir)], - undef, - 'summary', - undef, - sub { - push @bugs,shift; - }); + walk_bugs(dirs => [@dirs], + bugs => $opts->{bugs}, + callback => sub { + push @bugs,@_; + }, + bugs_per_call => 10000 + ); $s->resultset('Bug')->quick_insert_bugs(@bugs); } - walk_bugs([(@{$argv}?@{$argv} : $initialdir)], - $p, - 'summary', - $verbose, - sub { - my $bug = shift; - my $stat = stat(getbugcomponent($bug,'summary',$initialdir)); - if (not defined $stat) { - print STDERR "Unable to stat $bug $!\n"; - next; - } - if ($options{quick}) { - my $rs = $s->resultset('Bug')->search({bug=>$bug})->single(); - next if defined $rs and $stat->mtime < $rs->last_modified()->epoch(); - } - my $data = read_bug(bug => $bug, - location => $initialdir); - eval { - load_bug(db => $s, - data => split_status_fields($data), - tags => \%tags, - severities => \%severities, - queue => \%queue); - }; - if ($@) { - use Data::Dumper; - print STDERR Dumper($data) if $DEBUG; - die "failure while trying to load bug $bug\n$@"; - } - } + walk_bugs(dirs => [@dirs], + bugs => $opts->{bugs}, + progress_bar => $p, + $verbose?(logging=>\*STDERR):(), + callback => + sub { + my @bugs = @_; + my @bugs_to_update; + if ($options{quick}) { + @bugs_to_update = + bugs_to_update($s,@bugs); + } else { + @bugs_to_update = @bugs; + } + eval { + $s->txn_do(sub { + for my $bug (@bugs_to_update) { + load_bug(db => $s, + bug => $bug, + tags => \%tags, + severities => \%severities, + queue => \%queue); + } + }); + }; + if ($@) { + die "failure while trying to load bug: $@"; + } + }, + bugs_per_call => 50 ); handle_load_bug_queue(db => $s, queue => \%queue); @@ -308,19 +361,21 @@ sub add_versions { my $sp; if (not defined $src_pkgs{$versions[$i][0]}) { $src_pkgs{$versions[$i][0]} = - $s->resultset('SrcPkg')->find_or_create({pkg => $versions[$i][0]}); + $s->resultset('SrcPkg')-> + get_or_create_src_pkg_id($versions[$i][0]); } $sp = $src_pkgs{$versions[$i][0]}; # There's probably something wrong if the source package # doesn't exist, but we'll skip it for now - next unless defined $sp; - my $sv = $s->resultset('SrcVer')->find({src_pkg=>$sp->id(), + last if not defined $sp; + my $sv = $s->resultset('SrcVer')->find({src_pkg=>$sp, ver => $versions[$i][1], }); + last if not defined $sv; if (defined $ancestor_sv and defined $sv and not defined $sv->based_on()) { - $sv->update({based_on => $ancestor_sv->id()}) + $sv->update({based_on => $ancestor_sv}) } - $ancestor_sv = $sv; + $ancestor_sv = $sv->id(); } $p->update() if $p; } @@ -331,65 +386,80 @@ sub add_debinfo { my ($options,$opts,$p,$config,$argv) = @_; my @files = @{$argv}; + if (exists $opts->{debinfo_dir} and not @files) { + find(sub { + if (-f $_ and /\.debinfo$/) { + push @files, $File::Find::name; + } + }, + $opts->{debinfo_dir} + ); + } if (not @files) { { - if ($opts->{0}) { - local $/ = "\0"; - } + local $/ = "\n"; + local $/ = "\0" if $opts->{0}; while () { + s/\n$// unless $opts->{0}; + s/\0$// if $opts->{0}; push @files, $_; } } } return unless @files; my $s = db_connect($options); - my %arch; $p->target(scalar @files) if $p; - for my $file (@files) { - my $fh = IO::File->new($file,'r') or - die "Unable to open $file for reading: $!"; - my $f_stat = stat($file); - while (<$fh>) { - chomp; - next unless length $_; - my ($binname, $binver, $binarch, $srcname, $srcver) = split; - # if $srcver is not defined, this is probably a broken - # .debinfo file [they were causing #686106, see commit - # 49c85ab8 in dak.] Basically, $binarch didn't get put into - # the file, so we'll fudge it from the filename. - if (not defined $srcver) { - ($srcname,$srcver) = ($binarch,$srcname); - ($binarch) = $file =~ /_([^\.]+)\.debinfo/; - } - my $sp = $s->resultset('SrcPkg')->find_or_create({pkg => $srcname}); - # update the creation date if the data we have is earlier - my $ct_date = DateTime->from_epoch(epoch => $f_stat->ctime); - if ($ct_date < $sp->creation) { - $sp->creation($ct_date); - $sp->last_modified(DateTime->now); - $sp->update; - } - my $sv = $s->resultset('SrcVer')->find_or_create({src_pkg =>$sp->id(), - ver => $srcver}); - if (not defined $sv->upload_date() or $ct_date < $sv->upload_date()) { - $sv->upload_date($ct_date); - $sv->update; - } - my $arch; - if (defined $arch{$binarch}) { - $arch = $arch{$binarch}; - } else { - $arch = $s->resultset('Arch')->find_or_create({arch => $binarch}); - $arch{$binarch} = $arch; - } - my $bp = $s->resultset('BinPkg')->find_or_create({pkg => $binname}); - $s->resultset('BinVer')->find_or_create({bin_pkg => $bp->id(), - src_ver => $sv->id(), - arch => $arch->id(), - ver => $binver, - }); - } - $p->update() if $p; + my $it = natatime 100, @files; + while (my @v = $it->()) { + my %cache; + my @debinfos; +FILE: for my $file (@v) { + my $fh = IO::File->new($file,'r') or + die "Unable to open $file for reading: $!"; + my $f_stat = stat($file); + my $ct_date = DateTime->from_epoch(epoch => $f_stat->ctime); + my @file_debinfos; + while (<$fh>) { + chomp; + next unless length $_; + my ($binname, $binver, $binarch, $srcname, $srcver) = split; + # if $srcver is not defined, this is probably a broken + # .debinfo file [they were causing #686106, see commit + # 49c85ab8 in dak.] Basically, $binarch didn't get put into + # the file, so we'll fudge it from the filename. + if (not defined $srcver) { + ($srcname,$srcver) = ($binarch,$srcname); + ($binarch) = $file =~ /_([a-z0-9-]+)\.debinfo/; + } + # It turns out that there are debinfo files which are horribly + # screwed up, and have junk in them. We need to discard them + # completely + if (not defined $srcname or + not defined $srcver or + not defined $binname or + not defined $binver or + not defined $binarch or + $srcname !~ /^$config{package_name_re}$/o or + $binname !~ /^$config{package_name_re}$/o or + $srcver !~ /^$config{package_version_re}$/o or + $binver !~ /^$config{package_version_re}$/o + ) { + print STDERR "malformed debinfo: $file\n$_\n"; + next FILE; + } + push @file_debinfos, + [$binname,$binver,$binarch,$srcname,$srcver,$ct_date]; + } + push @debinfos, + @file_debinfos; + } + $s->txn_do( + sub { + for my $di (@debinfos) { + Debbugs::DB::Load::load_debinfo($s,@{$di}[0..5],\%cache); + } + }); + $p->update($p->last_update()+@v) if $p; } $p->remove() if $p; } @@ -398,12 +468,12 @@ sub add_maintainers { my ($options,$opts,$p,$config,$argv) = @_; my $s = db_connect($options); - my $maintainers = getsourcemaintainers(); + my $maintainers = getsourcemaintainers() // {}; $p->target(2) if $p; ## get all of the maintainers, and add the missing ones my $maints = $s->resultset('Maintainer')-> get_maintainers(values %{$maintainers}); - $p->update(); + $p->update() if $p; my @svs = $s->resultset('SrcVer')-> search({maintainer => undef }, @@ -509,15 +579,12 @@ sub add_logs { } my $s = db_connect($options); - - my $time = 0; - my $start_time = time; - - walk_bugs([(@{$argv}?@{$argv} : $initialdir)], - $p, - 'log', - $verbose, - sub { + walk_bugs(dirs => [(@{$argv}?@{$argv} : $initialdir)], + progress_bar => $p, + bug_file => 'log', + $verbose?(logging => \*STDERR):(), + callback => + sub { my $bug = shift; my $stat = stat(getbugcomponent($bug,'log',$initialdir)); if (not defined $stat) { @@ -525,8 +592,10 @@ sub add_logs { next; } if ($options{quick}) { - my $rs = $s->resultset('Bug')->search({bug=>$bug})->single(); - next if defined $rs and $stat->mtime < $rs->last_modified()->epoch(); + my $rs = $s->resultset('Bug')-> + search({id=>$bug})->single(); + return if defined $rs and + $stat->mtime <= $rs->last_modified()->epoch(); } eval { load_bug_log(db => $s, @@ -538,6 +607,81 @@ sub add_logs { }); } +sub add_bugs_and_logs { + my ($options,$opts,$p,$config,$argv) = @_; + + chdir($config->{spool_dir}) or + die "chdir $config->{spool_dir} failed: $!"; + + my $verbose = $options->{debug}; + + my $initialdir = "db-h"; + + if (defined $argv->[0] and $argv->[0] eq "archive") { + $initialdir = "archive"; + } + my $s = db_connect($options); + + my %tags; + my %severities; + my %queue; + + walk_bugs(dirs => [(@{$argv}?@{$argv} : $initialdir)], + progress_bar => $p, + $verbose?(logging => \*STDERR):(), + callback => + sub { + my @bugs = @_; + my @bugs_to_update; + if ($options{quick}) { + @bugs_to_update = + bugs_to_update($s,@bugs); + } else { + @bugs_to_update = @bugs; + } + eval { + $s->txn_do(sub { + for my $bug (@bugs_to_update) { + load_bug(db => $s, + bug => $bug, + tags => \%tags, + severities => \%severities, + queue => \%queue); + } + }); + }; + if ($@) { + die "failure while trying to load bug: $@"; + } + for my $bug (@bugs) { + my $stat = stat(getbugcomponent($bug,'log',$initialdir)); + if (not defined $stat) { + print STDERR "Unable to stat $bug $!\n"; + next; + } + if ($options{quick}) { + my $rs = $s->resultset('Bug')-> + search({id=>$bug})->single(); + return if defined $rs and + $stat->mtime <= $rs->last_modified()->epoch(); + } + eval { + load_bug_log(db => $s, + bug => $bug); + }; + if ($@) { + die "failure while trying to load bug log $bug\n$@"; + } + } + }, + bugs_per_call => 50, + ); + handle_load_bug_queue(db=>$s, + queue => \%queue, + ); + +} + sub add_packages { my ($options,$opts,$p,$config,$argv) = @_; @@ -645,31 +789,11 @@ sub db_connect { my ($options) = @_; # connect to the database; figure out how to handle errors # properly here. - my $s = Debbugs::DB->connect($options->{service}) or + my $s = Debbugs::DB->connect($options->{dsn} // + $options->{service}) or die "Unable to connect to database: "; } -sub open_compressed_file { - my ($file) = @_; - my $fh; - my $mode = '<:encoding(UTF-8)'; - my @opts; - if ($file =~ /\.gz$/) { - $mode = '-|:encoding(UTF-8)'; - push @opts,'gzip','-dc'; - } - if ($file =~ /\.xz$/) { - $mode = '-|:encoding(UTF-8)'; - push @opts,'xz','-dc'; - } - if ($file =~ /\.bz2$/) { - $mode = '-|:encoding(UTF-8)'; - push @opts,'bzip2','-dc'; - } - open($fh,$mode,@opts,$file); - return $fh; -} - sub read_release_file { my ($file) = @_; # parse release @@ -700,42 +824,22 @@ sub read_release_file { return (\%dist_info,\%p_f); } -sub walk_bugs { - my ($dirs,$p,$what,$verbose,$sub) = @_; - my @dirs = @{$dirs}; - my $tot_dirs = @dirs; - my $done_dirs = 0; - my $avg_subfiles = 0; - my $completed_files = 0; - while (my $dir = shift @dirs) { - printf "Doing dir %s ...\n", $dir if $verbose; - - opendir(DIR, "$dir/.") or die "opendir $dir: $!"; - my @subdirs = readdir(DIR); - closedir(DIR); - - my @list = map { m/^(\d+)\.$what$/?($1):() } @subdirs; - $tot_dirs -= @dirs; - push @dirs, map { m/^(\d+)$/ && -d "$dir/$1"?("$dir/$1"):() } @subdirs; - $tot_dirs += @dirs; - if ($avg_subfiles == 0) { - $avg_subfiles = @list; - } - - $p->target($avg_subfiles*($tot_dirs-$done_dirs)+$completed_files+@list) if $p; - $avg_subfiles = ($avg_subfiles * $done_dirs + @list) / ($done_dirs+1); - $done_dirs += 1; - for my $bug (@list) { - $completed_files++; - $p->update($completed_files) if $p; - print "Up to $completed_files bugs...\n" if ($completed_files % 100 == 0 && $verbose); - $sub->($bug); - } +sub bugs_to_update { + my ($s,@bugs) = @_; + my @bugs_to_update; + for my $bug (@bugs) { + my $stat = stat(getbugcomponent($bug,'summary',getbuglocation($bug,'summary'))); + if (not defined $stat) { + print STDERR "Unable to stat $bug $!\n"; + next; + } + my $rs = $s->resultset('Bug')->search({id=>$bug})->single(); + next if defined $rs and $stat->mtime <= $rs->last_modified()->epoch(); + push @bugs_to_update, $bug; } - $p->remove() if $p; + @bugs_to_update; } - __END__