X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bin%2Fdebbugs-loadsql;h=b97432685882e0fc1f9b25c5593d55f19e0fe459;hb=01657bf61b205931f9e263f626f80f5b34682b0c;hp=2edcd1ec32f4c9fd4f74c948e7145f3ce0958b87;hpb=618cf51d734cf3e496e85ba81ea4afd990bcd6ac;p=debbugs.git diff --git a/bin/debbugs-loadsql b/bin/debbugs-loadsql index 2edcd1e..b974326 100755 --- a/bin/debbugs-loadsql +++ b/bin/debbugs-loadsql @@ -17,10 +17,13 @@ debbugs-loadsql -- load debbugs sql database =head1 SYNOPSIS -debbugs-loadsql [options] +debbugs-loadsql [options] [subcommand] + Subcommands: + bugs help versions configuration + suites logs packages debinfo Options: - --quick, -q only load changed bugs + --quick, -q only load changed things --progress Show progress bar --service, -s service name --sysconfdir, -c postgresql service config dir @@ -37,15 +40,45 @@ Display this manual =head2 bugs -Add bugs +Add bugs (subject, number, etc) to the database + + --preload create all bugs first, then add information =head2 versions -Add versions +Add version descendant information (which version is based on which version) to +the database =head2 maintainers -Add source maintainers +Add source maintainers to the BTS + +=head2 configuration + +Add debbugs configuration information (tags, severity, etc) + +=head2 suites + +Add suite information from ftp distribution + + --ftpdists location of FTP mirror + +=head2 logs + +Add bug logs + +=head2 packages + +Add package information from the ftp archive + + --ftpdists location of FTP mirror + --suites Suite to operate on + +=head2 debinfo + +Add package information from a debinfo file + + --null -0 names of debinfo files are null separated =head1 OPTIONS @@ -98,15 +131,22 @@ Display this manual. use vars qw($DEBUG); -use Debbugs::Common qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers); +use Debbugs::Common (qw(checkpid lockpid get_hashname getparsedaddrs getbugcomponent make_list getsourcemaintainers), + qw(hash_slice open_compressed_file),); use Debbugs::Config qw(:config); use Debbugs::Status qw(read_bug split_status_fields); use Debbugs::Log; use Debbugs::DB; -use Debbugs::DB::Load qw(load_bug handle_load_bug_queue); +use Debbugs::DB::Load qw(:load_bug :load_package :load_suite); use DateTime; use File::stat; - +use File::Basename; +use File::Spec; +use IO::Dir; +use IO::File; +use IO::Uncompress::AnyUncompress; +use Encode qw(decode_utf8); +use List::MoreUtils qw(natatime); my %options = (debug => 0, @@ -138,17 +178,28 @@ $DEBUG = $options{debug}; my %subcommands = ('bugs' => {function => \&add_bugs, + arguments => {'preload' => 0}, }, 'versions' => {function => \&add_versions, }, 'debinfo' => {function => \&add_debinfo, + arguments => {'0|null' => 0}, }, 'maintainers' => {function => \&add_maintainers, }, 'configuration' => {function => \&add_configuration, }, + 'suites' => {function => \&add_suite, + arguments => {'ftpdists=s' => 1, + }, + }, 'logs' => {function => \&add_logs, }, + 'packages' => {function => \&add_packages, + arguments => {'ftpdists=s' => 1, + 'suites=s@' => 0, + }, + }, 'help' => {function => sub {pod2usage({verbose => 2});}} ); @@ -189,9 +240,12 @@ if (not defined $subcommand) { pod2usage(); } elsif (not exists $subcommands{$subcommand}) { print STDERR "$subcommand is not a valid subcommand; displaying usage.\n"; - pod2uage(); + pod2usage(); } +binmode(STDOUT,':encoding(UTF-8)'); +binmode(STDERR,':encoding(UTF-8)'); + my $opts = handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments}); $subcommands{$subcommand}{function}->(\%options,$opts,$prog_bar,\%config,\@ARGV); @@ -213,66 +267,61 @@ sub add_bugs { my $time = 0; my $start_time = time; - - - my @dirs = (@{$argv}?@{$argv} : $initialdir); - my $cnt = 0; my %tags; my %severities; my %queue; - my $tot_dirs = @{$argv}? @{$argv} : 0; - my $done_dirs = 0; - my $avg_subfiles = 0; - my $completed_files = 0; - while (my $dir = shift @dirs) { - printf "Doing dir %s ...\n", $dir if $verbose; - - opendir(DIR, "$dir/.") or die "opendir $dir: $!"; - my @subdirs = readdir(DIR); - closedir(DIR); - - my @list = map { m/^(\d+)\.summary$/?($1):() } @subdirs; - $tot_dirs -= @dirs; - push @dirs, map { m/^(\d+)$/ && -d "$dir/$1"?("$dir/$1"):() } @subdirs; - $tot_dirs += @dirs; - if ($avg_subfiles == 0) { - $avg_subfiles = @list; - } - - $p->target($avg_subfiles*($tot_dirs-$done_dirs)+$completed_files+@list) if $p; - $avg_subfiles = ($avg_subfiles * $done_dirs + @list) / ($done_dirs+1); - $done_dirs += 1; - for my $bug (@list) { - $completed_files++; - $p->update($completed_files) if $p; - print "Up to $cnt bugs...\n" if (++$cnt % 100 == 0 && $verbose); - my $stat = stat(getbugcomponent($bug,'summary',$initialdir)); - if (not defined $stat) { - print STDERR "Unable to stat $bug $!\n"; - next; - } - if ($options{quick}) { - my $rs = $s->resultset('Bug')->search({bug=>$bug})->single(); - next if defined $rs and $stat->mtime < $rs->last_modified()->epoch(); - } - my $data = read_bug(bug => $bug, - location => $initialdir); - eval { - load_bug(db => $s, - data => split_status_fields($data), - tags => \%tags, - severities => \%severities, - queue => \%queue); - }; - if ($@) { - use Data::Dumper; - print STDERR Dumper($data) if $DEBUG; - die "failure while trying to load bug $bug\n$@"; - } - } + if ($opts->{preload}) { + my @bugs; + walk_bugs([(@{$argv}?@{$argv} : $initialdir)], + undef, + 'summary', + undef, + sub { + push @bugs,@_; + }, + 10000 + ); + $s->resultset('Bug')->quick_insert_bugs(@bugs); } - $p->remove() if $p; + walk_bugs([(@{$argv}?@{$argv} : $initialdir)], + $p, + 'summary', + $verbose, + sub { + my @bugs = @_; + my @bugs_to_update; + if ($options{quick}) { + for my $bug (@bugs) { + my $stat = stat(getbugcomponent($bug,'summary',$initialdir)); + if (not defined $stat) { + print STDERR "Unable to stat $bug $!\n"; + next; + } + my $rs = $s->resultset('Bug')->search({id=>$bug})->single(); + next if defined $rs and $stat->mtime <= $rs->last_modified()->epoch(); + push @bugs_to_update, $bug; + } + } else { + @bugs_to_update = @bugs; + } + eval { + $s->txn_do(sub { + for my $bug (@bugs_to_update) { + load_bug(db => $s, + bug => $bug, + tags => \%tags, + severities => \%severities, + queue => \%queue); + } + }); + }; + if ($@) { + die "failure while trying to load bug: $@"; + } + }, + 50 + ); handle_load_bug_queue(db => $s, queue => \%queue); } @@ -325,44 +374,52 @@ sub add_debinfo { my ($options,$opts,$p,$config,$argv) = @_; my @files = @{$argv}; + if (not @files) { + { + local $/ = "\n"; + local $/ = "\0" if $opts->{0}; + while () { + s/\n$// unless $opts->{0}; + s/\0$// if $opts->{0}; + push @files, $_; + } + } + } return unless @files; my $s = db_connect($options); - my %arch; + my %cache; $p->target(scalar @files) if $p; - for my $file (@files) { - my $fh = IO::File->new($file,'r') or - die "Unable to open $file for reading: $!"; - my $f_stat = stat($file); - while (<$fh>) { - chomp; - next unless length $_; - my ($binname, $binver, $binarch, $srcname, $srcver) = split; - # if $srcver is not defined, this is probably a broken - # .debinfo file [they were causing #686106, see commit - # 49c85ab8 in dak.] Basically, $binarch didn't get put into - # the file, so we'll fudge it from the filename. - if (not defined $srcver) { - ($srcname,$srcver) = ($binarch,$srcname); - ($binarch) = $file =~ /_([^\.]+)\.debinfo/; - } - my $sp = $s->resultset('SrcPkg')->find_or_create({pkg => $srcname}); - my $sv = $s->resultset('SrcVer')->find_or_create({src_pkg =>$sp->id(), - ver => $srcver}); - my $arch; - if (defined $arch{$binarch}) { - $arch = $arch{$binarch}; - } else { - $arch = $s->resultset('Arch')->find_or_create({arch => $binarch}); - $arch{$binarch} = $arch; - } - my $bp = $s->resultset('BinPkg')->find_or_create({pkg => $binname}); - $s->resultset('BinVer')->find_or_create({bin_pkg => $bp->id(), - src_ver => $sv->id(), - arch => $arch->id(), - ver => $binver, - }); - } - $p->update() if $p; + my $it = natatime 100, @files; + while (my @v = $it->()) { + my @debinfos; + for my $file (@v) { + my $fh = IO::File->new($file,'r') or + die "Unable to open $file for reading: $!"; + my $f_stat = stat($file); + my $ct_date = DateTime->from_epoch(epoch => $f_stat->ctime); + while (<$fh>) { + chomp; + next unless length $_; + my ($binname, $binver, $binarch, $srcname, $srcver) = split; + # if $srcver is not defined, this is probably a broken + # .debinfo file [they were causing #686106, see commit + # 49c85ab8 in dak.] Basically, $binarch didn't get put into + # the file, so we'll fudge it from the filename. + if (not defined $srcver) { + ($srcname,$srcver) = ($binarch,$srcname); + ($binarch) = $file =~ /_([^\.]+)\.debinfo/; + } + push @debinfos, + [$binname,$binver,$binarch,$srcname,$srcver,$ct_date]; + } + } + $s->txn_do( + sub { + for my $di (@debinfos) { + Debbugs::DB::Load::load_debinfo($s,@{$di}[0..5],\%cache); + } + }); + $p->update($p->last_update()+@v) if $p; } $p->remove() if $p; } @@ -372,52 +429,213 @@ sub add_maintainers { my $s = db_connect($options); my $maintainers = getsourcemaintainers(); - $p->target(scalar keys %{$maintainers}) if $p; - for my $pkg (keys %{$maintainers}) { - my $maint = $maintainers->{$pkg}; - # see if a maintainer already exists; if so, we don't do - # anything here - my $maint_r = $s->resultset('Maintainer')-> - find({name => $maint}); - if (not defined $maint_r) { - # get e-mail address of maintainer - my $addr = getparsedaddrs($maint); - my $e_mail = $addr->address(); - my $full_name = $addr->phrase(); - $full_name =~ s/^\"|\"$//g; - $full_name =~ s/^\s+|\s+$//g; - # find correspondent - my $correspondent = $s->resultset('Correspondent')-> - find_or_create({addr => $e_mail}); - if (length $full_name) { - my $c_full_name = $correspondent->find_or_create_related('correspondent_full_names', - {full_name => $full_name}) if length $full_name; - $c_full_name->update({last_seen => 'NOW()'}); - } - $maint_r = - $s->resultset('Maintainer')-> - find_or_create({name => $maint, - correspondent => $correspondent, - }); - } - # add the maintainer to the source package for packages with - # no maintainer - $s->txn_do(sub { - $s->resultset('SrcPkg')->search({pkg => $pkg})-> - search_related_rs('src_vers',{ maintainer => undef})-> - update_all({maintainer => $maint_r->id()}); - }); - $p->update() if $p; + $p->target(2) if $p; + ## get all of the maintainers, and add the missing ones + my $maints = $s->resultset('Maintainer')-> + get_maintainers(values %{$maintainers}); + $p->update(); + my @svs = $s->resultset('SrcVer')-> + search({maintainer => undef + }, + {join => 'src_pkg', + group_by => 'me.src_pkg, src_pkg.pkg', + result_class => 'DBIx::Class::ResultClass::HashRefInflator', + columns => [qw(me.src_pkg src_pkg.pkg)], + } + )->all(); + $p->target(2+@svs) if $p; + $p->update() if $p; + for my $sv (@svs) { + if (exists $maintainers->{$sv->{src_pkg}{pkg}}) { + my $pkg = $sv->{src_pkg}{pkg}; + my $maint = $maints-> + {$maintainers->{$pkg}}; + $s->txn_do(sub {$s->resultset('SrcVer')-> + search({maintainer => undef, + 'src_pkg.pkg' => $pkg + }, + {join => 'src_pkg'} + )->update({maintainer => $maint}) + }); + } + $p->update() if $p; } $p->remove() if $p; } sub add_configuration { my ($options,$opts,$p,$config,$argv) = @_; + + my $s = db_connect($options); + + # tags + # add all tags + my %tags; + for my $tag (@{$config{tags}}) { + $tags{$tag} = 1; + $s->resultset('Tag')->find_or_create({tag => $tag}); + } + # mark obsolete tags + for my $tag ($s->resultset('Tag')->search_rs()->all()) { + next if exists $tags{$tag->tag}; + $tag->obsolete(1); + $tag->update; + } + + # severities + my %sev_names; + my $order = -1; + for my $sev_name (($config{default_severity},@{$config{severity_list}})) { + # add all severitites + my $sev = $s->resultset('Severity')->find_or_create({severity => $sev_name}); + # mark strong severities + if (grep {$_ eq $sev_name} @{$config{strong_severities}}) { + $sev->strong(1); + } + $sev->ordering($order); + $sev->update(); + $order++; + $sev_names{$sev_name} = 1; + } + # mark obsolete severities + for my $sev ($s->resultset('Severity')->search_rs()->all()) { + next if exists $sev_names{$sev->severity()}; + $sev->obsolete(1); + $sev->update(); + } +} + +sub add_suite { + my ($options,$opts,$p,$config,$argv) = @_; + # suites + + my $s = db_connect($options); + my $dist_dir = IO::Dir->new($opts->{ftpdists}); + my @dist_names = + grep { $_ !~ /^\./ and + -d $opts->{ftpdists}.'/'.$_ and + not -l $opts->{ftpdists}.'/'.$_ + } $dist_dir->read; + while (my $dist = shift @dist_names) { + my $dist_dir = $opts->{ftpdists}.'/'.$dist; + my ($dist_info,$package_files) = + read_release_file($dist_dir.'/Release'); + load_suite($s,$dist_info); + } } sub add_logs { my ($options,$opts,$p,$config,$argv) = @_; + + chdir($config->{spool_dir}) or + die "chdir $config->{spool_dir} failed: $!"; + + my $verbose = $options->{debug}; + + my $initialdir = "db-h"; + + if (defined $argv->[0] and $argv->[0] eq "archive") { + $initialdir = "archive"; + } + my $s = db_connect($options); + + + my $time = 0; + my $start_time = time; + + walk_bugs([(@{$argv}?@{$argv} : $initialdir)], + $p, + 'log', + $verbose, + sub { + my $bug = shift; + my $stat = stat(getbugcomponent($bug,'log',$initialdir)); + if (not defined $stat) { + print STDERR "Unable to stat $bug $!\n"; + next; + } + if ($options{quick}) { + my $rs = $s->resultset('Bug')->search({bug=>$bug})->single(); + next if defined $rs and $stat->mtime <= $rs->last_modified()->epoch(); + } + eval { + load_bug_log(db => $s, + bug => $bug); + }; + if ($@) { + die "failure while trying to load bug log $bug\n$@"; + } + }); +} + +sub add_packages { + my ($options,$opts,$p,$config,$argv) = @_; + + my $dist_dir = IO::Dir->new($opts->{ftpdists}); + my @dist_names = + grep { $_ !~ /^\./ and + -d $opts->{ftpdists}.'/'.$_ and + not -l $opts->{ftpdists}.'/'.$_ + } $dist_dir->read; + my %s_p; + while (my $dist = shift @dist_names) { + my $dist_dir = $opts->{ftpdists}.'/'.$dist; + my ($dist_info,$package_files) = + read_release_file($dist_dir.'/Release'); + $s_p{$dist_info->{Codename}} = $package_files; + } + my $tot = 0; + for my $suite (keys %s_p) { + for my $component (keys %{$s_p{$suite}}) { + $tot += scalar keys %{$s_p{$suite}{$component}}; + } + } + $p->target($tot) if $p; + my $i = 0; + my $avg_pkgs = 0; + my $tot_suites = scalar keys %s_p; + my $done_suites=0; + my $completed_pkgs=0; + # parse packages files + for my $suite (keys %s_p) { + my @pkgs; + for my $component (keys %{$s_p{$suite}}) { + my @archs = keys %{$s_p{$suite}{$component}}; + if (grep {$_ eq 'source'} @archs) { + @archs = ('source',grep {$_ ne 'source'} @archs); + } + for my $arch (@archs) { + my $pfh = open_compressed_file($s_p{$suite}{$component}{$arch}) or + die "Unable to open $s_p{$suite}{$component}{$arch} for reading: $!"; + local $_; + local $/ = ''; # paragraph mode + while (<$pfh>) { + my %pkg; + for my $field (qw(Package Maintainer Version Source)) { + /^\Q$field\E: (.*)/m; + $pkg{$field} = $1; + } + next unless defined $pkg{Package} and + defined $pkg{Version}; + push @pkgs,[$arch,$component,\%pkg]; + } + } + } + my $s = db_connect($options); + if ($avg_pkgs==0) { + $avg_pkgs = @pkgs; + } + $p->target($avg_pkgs*($tot_suites-$done_suites-1)+ + $completed_pkgs+@pkgs) if $p; + load_packages($s, + $suite, + \@pkgs, + $p); + $avg_pkgs=($avg_pkgs*$done_suites + @pkgs)/($done_suites+1); + $completed_pkgs += @pkgs; + $done_suites++; + } + $p->remove() if $p; } sub handle_subcommand_arguments { @@ -457,10 +675,79 @@ sub db_connect { my ($options) = @_; # connect to the database; figure out how to handle errors # properly here. - my $s = Debbugs::DB->connect('dbi:Pg:service='.$options->{service}) or + my $s = Debbugs::DB->connect($options->{service}) or die "Unable to connect to database: "; } +sub read_release_file { + my ($file) = @_; + # parse release + my $rfh = open_compressed_file($file) or + die "Unable to open $file for reading: $!"; + my %dist_info; + my $in_sha1; + my %p_f; + while (<$rfh>) { + chomp; + if (s/^(\S+):\s*//) { + if ($1 eq 'SHA1'or $1 eq 'SHA256') { + $in_sha1 = 1; + next; + } + $dist_info{$1} = $_; + } elsif ($in_sha1) { + s/^\s//; + my ($sha,$size,$f) = split /\s+/,$_; + next unless $f =~ /(?:Packages|Sources)(?:\.gz|\.xz)$/; + next unless $f =~ m{^([^/]+)/([^/]+)/([^/]+)$}; + my ($component,$arch,$package_source) = ($1,$2,$3); + $arch =~ s/binary-//; + next if exists $p_f{$component}{$arch}; + $p_f{$component}{$arch} = File::Spec->catfile(dirname($file),$f); + } + } + return (\%dist_info,\%p_f); +} + +sub walk_bugs { + my ($dirs,$p,$what,$verbose,$sub,$n) = @_; + my @dirs = @{$dirs}; + my $tot_dirs = @dirs; + my $done_dirs = 0; + my $avg_subfiles = 0; + my $completed_files = 0; + $n //= 1; + while (my $dir = shift @dirs) { + printf "Doing dir %s ...\n", $dir if $verbose; + + opendir(DIR, "$dir/.") or die "opendir $dir: $!"; + my @subdirs = readdir(DIR); + closedir(DIR); + + my @list = map { m/^(\d+)\.$what$/?($1):() } @subdirs; + $tot_dirs -= @dirs; + push @dirs, map { m/^(\d+)$/ && -d "$dir/$1"?("$dir/$1"):() } @subdirs; + $tot_dirs += @dirs; + if ($avg_subfiles == 0) { + $avg_subfiles = @list; + } + + $p->target($avg_subfiles*($tot_dirs-$done_dirs)+$completed_files+@list) if $p; + $avg_subfiles = ($avg_subfiles * $done_dirs + @list) / ($done_dirs+1); + $done_dirs += 1; + + my $it = natatime $n,@list; + while (my @bugs = $it->()) { + $sub->(@bugs); + $completed_files += scalar @bugs; + $p->update($completed_files) if $p; + print "Up to $completed_files bugs...\n" + if ($completed_files % 100 == 0 && $verbose); + } + } + $p->remove() if $p; +} + __END__