]> git.donarmstrong.com Git - debbugs.git/blob - Debbugs/DB/Load.pm
delete associations in a transaction by 1000
[debbugs.git] / Debbugs / DB / Load.pm
1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later version. See the
3 # file README and COPYING for more information.
4 # Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
5
6 package Debbugs::DB::Load;
7
8 =head1 NAME
9
10 Debbugs::DB::Load -- Utility routines for loading the database
11
12 =head1 SYNOPSIS
13
14
15 =head1 DESCRIPTION
16
17
18 =head1 BUGS
19
20 None known.
21
22 =cut
23
24 use warnings;
25 use strict;
26 use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
27 use base qw(Exporter);
28
29 BEGIN{
30      ($VERSION) = q$Revision$ =~ /^Revision:\s+([^\s+])/;
31      $DEBUG = 0 unless defined $DEBUG;
32
33      @EXPORT = ();
34      %EXPORT_TAGS = (load_bug    => [qw(load_bug handle_load_bug_queue load_bug_log)],
35                      load_debinfo => [qw(load_debinfo)],
36                      load_package => [qw(load_packages)],
37                      load_suite => [qw(load_suite)],
38                     );
39      @EXPORT_OK = ();
40      Exporter::export_ok_tags(keys %EXPORT_TAGS);
41      $EXPORT_TAGS{all} = [@EXPORT_OK];
42 }
43
44 use Params::Validate qw(validate_with :types);
45 use List::MoreUtils qw(natatime);
46
47 use Debbugs::Status qw(read_bug split_status_fields);
48 use Debbugs::DB;
49 use DateTime;
50 use Debbugs::Common qw(make_list getparsedaddrs);
51 use Debbugs::Config qw(:config);
52 use Debbugs::MIME qw(parse_to_mime_entity decode_rfc1522);
53 use DateTime::Format::Mail;
54 use Carp;
55
56 =head2 Bug loading
57
58 Routines to load bug; exported with :load_bug
59
60 =over
61
62 =item load_bug
63
64      load_bug(db => $schema,
65               data => split_status_fields($data),
66               tags => \%tags,
67               queue => \%queue);
68
69 Loads a bug's metadata into the database. (Does not load any messages)
70
71 =over
72
73 =item db -- Debbugs::DB object
74
75 =item data -- Bug data (from read_bug) which has been split with split_status_fields
76
77 =item tags -- tag cache (hashref); optional
78
79 =item queue -- queue of operations to perform after bug is loaded; optional.
80
81 =back
82
83 =cut
84
85 sub load_bug {
86     my %param = validate_with(params => \@_,
87                               spec => {db => {type => OBJECT,
88                                              },
89                                        data => {type => HASHREF,
90                                                 optional => 1,
91                                                },
92                                        bug => {type => SCALAR,
93                                                optional => 1,
94                                               },
95                                        tags => {type => HASHREF,
96                                                 default => sub {return {}},
97                                                 optional => 1},
98                                        severities => {type => HASHREF,
99                                                       default => sub {return {}},
100                                                       optional => 1,
101                                                      },
102                                        queue => {type => HASHREF,
103                                                  optional => 1},
104                                        packages => {type => HASHREF,
105                                                     default => sub {return {}},
106                                                     optional => 1,
107                                                    },
108                                       });
109     my $s = $param{db};
110     if (not exists $param{data} and not exists $param{bug}) {
111         croak "One of data or bug must be provided to load_bug";
112     }
113     if (not exists $param{data}) {
114         $param{data} = read_bug(bug => $param{bug});
115     }
116     my $data = $param{data};
117     my $tags = $param{tags};
118     my $queue = $param{queue};
119     my $severities = $param{severities};
120     my $can_queue = 1;
121     if (not defined $queue) {
122         $can_queue = 0;
123         $queue = {};
124     }
125     my %tags;
126     $data = split_status_fields($data);
127     for my $tag (make_list($data->{keywords})) {
128         next unless defined $tag and length $tag;
129         # this allows for invalid tags. But we'll use this to try to
130         # find those bugs and clean them up
131         if (not exists $tags->{$tag}) {
132             $tags->{$tag} = $s->resultset('Tag')->
133             find_or_create({tag => $tag});
134         }
135         $tags{$tag} = $tags->{$tag};
136     }
137     my $severity = length($data->{severity}) ? $data->{severity} :
138         $config{default_severity};
139     if (not exists $severities->{$severity}) {
140         $severities->{$severity} =
141             $s->resultset('Severity')->
142             find_or_create({severity => $severity},
143                           );
144     }
145     $severity = $severities->{$severity};
146     my $bug =
147         {id => $data->{bug_num},
148          creation => DateTime->from_epoch(epoch => $data->{date}),
149          log_modified => DateTime->from_epoch(epoch => $data->{log_modified}),
150          last_modified => DateTime->from_epoch(epoch => $data->{last_modified}),
151          archived => $data->{archived},
152          (defined $data->{unarchived} and length($data->{unarchived}))?
153          (unarchived => DateTime->from_epoch(epoch => $data->{unarchived})):(),
154          forwarded => $data->{forwarded} // '',
155          summary => $data->{summary} // '',
156          outlook => $data->{outlook} // '',
157          subject => $data->{subject} // '',
158          done_full => $data->{done} // '',
159          severity => $severity,
160          owner_full => $data->{owner} // '',
161          submitter_full => $data->{originator} // '',
162         };
163     my %addr_map =
164         (done => 'done',
165          owner => 'owner',
166          submitter => 'originator',
167         );
168     for my $addr_type (keys %addr_map) {
169         $bug->{$addr_type} = undef;
170         next unless defined $data->{$addr_map{$addr_type}} and
171             length($data->{$addr_map{$addr_type}});
172         $bug->{$addr_type} =
173             $s->resultset('Correspondent')->
174             get_correspondent_id($data->{$addr_map{$addr_type}})
175     }
176     my $b = $s->resultset('Bug')->update_or_create($bug) or
177         die "Unable to update or create bug $bug->{id}";
178     $s->txn_do(sub {
179                  $b->set_related_packages('binpackages',
180                                           [grep {defined $_ and
181                                                    length $_ and $_ !~ /^src:/}
182                                            make_list($data->{package})],
183                                           $param{packages},
184                                          );
185                  $b->set_related_packages('srcpackages',
186                                           [grep {defined $_ and
187                                                    $_ =~ /^src:/}
188                                            make_list($data->{package})],
189                                           $param{packages},
190                                          );
191                  $b->set_related_packages('affects_binpackages',
192                                           [grep {defined $_ and
193                                                    length $_ and $_ !~ /^src:/}
194                                            make_list($data->{affects})
195                                           ],
196                                           $param{packages},
197                                          );
198                  $b->set_related_packages('affects_srcpackages',
199                                           [grep {defined $_ and
200                                                    $_ =~ /^src:/}
201                                            make_list($data->{affects})],
202                                           $param{packages},
203                                          );
204                  for my $ff (qw(found fixed)) {
205                        my @elements = $s->resultset('BugVer')->search({bug => $data->{bug_num},
206                                                                        found  => $ff eq 'found'?1:0,
207                                                                       });
208                        my %elements_to_delete = map {($elements[$_]->ver_string(),
209                                                       $elements[$_])} 0..$#elements;
210                        my %elements_to_add;
211                        my @elements_to_keep;
212                        for my $version (@{$data->{"${ff}_versions"}}) {
213                            if (exists $elements_to_delete{$version}) {
214                                push @elements_to_keep,$version;
215                            } else {
216                                $elements_to_add{$version} = 1;
217                            }
218                        }
219                        for my $version (@elements_to_keep) {
220                            delete $elements_to_delete{$version};
221                        }
222                        for my $element (keys %elements_to_delete) {
223                            $elements_to_delete{$element}->delete();
224                        }
225                        for my $element (keys %elements_to_add) {
226                            # find source package and source version id
227                            my $ne = $s->resultset('BugVer')->new_result({bug => $data->{bug_num},
228                                                                          ver_string => $element,
229                                                                          found => $ff eq 'found'?1:0,
230                                                                         }
231                                                                        );
232                            if (my ($src_pkg,$src_ver) = $element =~ m{^([^\/]+)/(.+)$}) {
233                                my $src_pkg_e = $s->resultset('SrcPkg')->single({pkg => $src_pkg});
234                                if (defined $src_pkg_e) {
235                                    $ne->src_pkg($src_pkg_e->id());
236                                    my $src_ver_e = $s->resultset('SrcVer')->single({src_pkg => $src_pkg_e->id(),
237                                                                                     ver => $src_ver
238                                                                                    });
239                                    $ne->src_ver($src_ver_e->id()) if defined $src_ver_e;
240                                }
241                            }
242                            $ne->insert();
243                        }
244                    }
245                });
246     ### set bug tags
247     $s->txn_do(sub {$b->set_tags([values %tags ] )});
248     # because these bugs reference other bugs which might not exist
249     # yet, we can't handle them until we've loaded all bugs. queue
250     # them up.
251     for my $merge_block (qw(merged block)) {
252         my $data_key = $merge_block;
253         $data_key .= 'with' if $merge_block eq 'merged';
254         if (@{$data->{$data_key}||[]}) {
255             my $count = $s->resultset('Bug')->search({id => [@{$data->{$data_key}}]})->count();
256             if ($count == @{$data->{$data_key}}) {
257                 handle_load_bug_queue(db=>$s,
258                                       queue => {$merge_block,
259                                                {$data->{bug_num},[@{$data->{$data_key}}]}
260                                                });
261             } else {
262                 $queue->{$merge_block}{$data->{bug_num}} = [@{$data->{$data_key}}];
263             }
264         }
265     }
266
267     if (not $can_queue and keys %{$queue}) {
268         handle_load_bug_queue(db => $s,queue => $queue);
269     }
270
271     # still need to handle merges, versions, etc.
272 }
273
274 =item handle_load_bug_queue
275
276      handle_load_bug_queue(db => $schema,queue => $queue);
277
278 Handles a queue of operations created by load bug. [These operations
279 are used to handle cases where a bug referenced by a loaded bug may
280 not exist yet. In cases where the bugs should exist, the queue is
281 cleared automatically by load_bug if queue is undefined.
282
283 =cut
284
285 sub handle_load_bug_queue{
286     my %param = validate_with(params => \@_,
287                               spec => {db => {type => OBJECT,
288                                              },
289                                        queue => {type => HASHREF,
290                                                 },
291                                       });
292     my $s = $param{db};
293     my $queue = $param{queue};
294     my %queue_types =
295         (merged => {set => 'BugMerged',
296                     columns => [qw(bug merged)],
297                     bug => 'bug',
298                    },
299          blocks => {set => 'BugBlock',
300                     columns => [qw(bug blocks)],
301                     bug => 'bug',
302                    },
303         );
304     for my $queue_type (keys %queue_types) {
305         for my $bug (%{$queue->{$queue_type}}) {
306             my $qt = $queue_types{$queue_type};
307             $s->txn_do(sub {
308                            $s->resultset($qt->{set})->search({$qt->{bug},$bug})->delete();
309                            $s->populate($qt->{set},[[@{$qt->{columns}}],
310                                                     map {[$bug,$_]} @{$queue->{$queue_type}{$bug}}]) if
311                                @{$queue->{$queue_type}{$bug}//[]};
312                        }
313                       );
314         }
315     }
316 }
317
318 =item load_bug_log -- load bug logs
319
320        load_bug_log(db  => $s,
321                     bug => $bug);
322
323
324 =over
325
326 =item db -- database 
327
328 =item bug -- bug whose log should be loaded
329
330 =back
331
332 =cut
333
334 sub load_bug_log {
335     my %param = validate_with(params => \@_,
336                               spec => {db => {type => OBJECT,
337                                              },
338                                        bug => {type => SCALAR,
339                                               },
340                                        queue => {type => HASHREF,
341                                                  optional => 1},
342                                       });
343     my $s = $param{db};
344     my $msg_num=0;
345     my %seen_msg_ids;
346     my $log = Debbugs::Log->new(bug_num => $param{bug}) or
347         die "Unable to open log for $param{bug} for reading: $!";
348     while (my $record = $log->read_record()) {
349         next unless $record->{type} eq 'incoming-recv';
350         my ($msg_id) = $record->{text} =~ /^Message-Id:\s+<(.+)>/im;
351         next if defined $msg_id and exists $seen_msg_ids{$msg_id};
352         $seen_msg_ids{$msg_id} = 1 if defined $msg_id;
353         next if defined $msg_id and $msg_id =~ /handler\..+\.ack(?:info)?\@/;
354         my $entity = parse_to_mime_entity($record);
355         # search for a message with this message id in the database
356         $msg_id = $entity->head->get('Message-Id:');
357         $msg_id =~ s/^\s*\<//;
358         $msg_id =~ s/>\s*$//;
359         # check to see if the subject, to, and from match. if so, it's
360         # probably the same message.
361         my $subject = decode_rfc1522($entity->head->get('Subject:'));
362         $subject =~ s/\n(?:(\s)\s*|\s*$)/$1/g;
363         my $to = decode_rfc1522($entity->head->get('To:'));
364         $to =~ s/\n(?:(\s)\s*|\s*$)/$1/g;
365         my $from = decode_rfc1522($entity->head->get('From:'));
366         $from =~ s/\n(?:(\s)\s*|\s*$)/$1/g;
367         my $m = $s->resultset('Message')->
368             find({msgid => $msg_id,
369                   from_complete => $from,
370                   to_complete => $to,
371                   subject => $subject
372                  });
373         if (not defined $m) {
374             # if not, create a new message
375             $m = $s->resultset('Message')->
376                 find_or_create({msgid => $msg_id,
377                                 from_complete => $from,
378                                 to_complete => $to,
379                                 subject => $subject
380                                });
381             eval {
382                 $m->sent_date(DateTime::Format::Mail->
383                               parse_datetime($entity->head->get('Date:',0)));
384             };
385             my $spam = $entity->head->get('X-Spam-Status:',0);
386             if ($spam=~ /score=([\d\.]+)/) {
387                 $m->spam_score($1);
388             }
389             my %corr;
390             @{$corr{from}} = getparsedaddrs($from);
391             @{$corr{to}} = getparsedaddrs($to);
392             @{$corr{cc}} = getparsedaddrs($entity->head->get('Cc:'));
393             # add correspondents if necessary
394             my @cors;
395             for my $type (keys %corr) {
396                 for my $addr (@{$corr{$type}}) {
397                     push @cors,
398                         {correspondent => $s->resultset('Correspondent')->
399                          get_correspondent_id($addr),
400                          correspondent_type => $type,
401                         };
402                 }
403             }
404             $m->update();
405             $s->txn_do(sub {
406                            $m->message_correspondents()->delete();
407                            $m->add_to_message_correspondents(@cors);
408                        }
409                       );
410         }
411         my $recv;
412         if ($entity->head->get('Received:',0)
413             =~ /via spool by (\S+)/) {
414             $recv = $s->resultset('Correspondent')->
415                 get_correspondent_id($1);
416             $m->add_to_message_correspondents({correspondent=>$recv,
417                                                correspondent_type => 'recv'});
418         }
419         # link message to bugs if necessary
420         $m->find_or_create_related('bug_messages',
421                                   {bug=>$param{bug},
422                                    message_number => $msg_num});
423     }
424
425 }
426
427 =back
428
429 =head2 Debinfo
430
431 Commands to handle src and package version loading from debinfo files
432
433 =over
434
435 =item load_debinfo
436
437      load_debinfo($schema,$binname, $binver, $binarch, $srcname, $srcver);
438
439
440
441 =cut
442
443 sub load_debinfo {
444     my ($schema,$binname, $binver, $binarch, $srcname, $srcver) = @_;
445     my $sp = $schema->resultset('SrcPkg')->find_or_create({pkg => $srcname});
446     my $sv = $schema->resultset('SrcVer')->find_or_create({src_pkg=>$sp->id(),
447                                                            ver => $srcver});
448     my $arch = $schema->resultset('Arch')->find_or_create({arch => $binarch});
449     my $bp = $schema->resultset('BinPkg')->find_or_create({pkg => $binname});
450     $schema->resultset('BinVer')->find_or_create({bin_pkg_id => $bp->id(),
451                                                   src_ver_id => $sv->id(),
452                                                   arch_id    => $arch->id(),
453                                                   ver        => $binver,
454                                                  });
455 }
456
457
458 =back
459
460 =head2 Packages
461
462 =over
463
464 =item load_package
465
466      load_package($schema,$suite,$component,$arch,$pkg)
467
468 =cut
469
470 sub load_packages {
471     my ($schema,$suite,$pkgs,$p) = @_;
472     my $suite_id = $schema->resultset('Suite')->
473         find_or_create({codename => $suite})->id;
474     my %maint_cache;
475     my %arch_cache;
476     my %source_cache;
477     my $src_max_last_modified = $schema->resultset('SrcAssociation')->
478         search_rs({suite => $suite_id},
479                  {order_by => {-desc => ['me.modified']},
480                   rows => 1,
481                   page => 1
482                  }
483                  )->single();
484     my $bin_max_last_modified = $schema->resultset('BinAssociation')->
485         search_rs({suite => $suite_id},
486                  {order_by => {-desc => ['me.modified']},
487                   rows => 1,
488                   page => 1
489                  }
490                  )->single();
491     my %maints;
492     my %sources;
493     my %bins;
494     for my $pkg_tuple (@{$pkgs}) {
495         my ($arch,$component,$pkg) = @{$pkg_tuple};
496         $maints{$pkg->{Maintainer}} = $pkg->{Maintainer};
497         if ($arch eq 'source') {
498             my $source = $pkg->{Package};
499             my $source_ver = $pkg->{Version};
500             $sources{$source}{$source_ver} = $pkg->{Maintainer};
501         } else {
502             my $source = $pkg->{Source} // $pkg->{Package};
503             my $source_ver = $pkg->{Version};
504             if ($source =~ /^\s*(\S+) \(([^\)]+)\)\s*$/) {
505                 ($source,$source_ver) = ($1,$2);
506             }
507             $sources{$source}{$source_ver} = $pkg->{Maintainer};
508             $bins{$arch}{$pkg->{Package}} =
509                {arch => $arch,
510                 bin => $pkg->{Package},
511                 bin_ver => $pkg->{Version},
512                 src_ver => $source_ver,
513                 source  => $source,
514                 maint   => $pkg->{Maintainer},
515                };
516         }
517     }
518     # Retrieve and Insert new maintainers
519     my $maints =
520         $schema->resultset('Maintainer')->
521         get_maintainers(keys %maints);
522     my $archs =
523         $schema->resultset('Arch')->
524         get_archs(keys %bins);
525     # We want all of the source package/versions which are in this suite to
526     # start with
527     my @sa_to_add;
528     my @sa_to_del;
529     my %included_sa;
530     # Calculate which source packages are no longer in this suite
531     for my $s ($schema->resultset('SrcPkg')->
532                src_pkg_and_ver_in_suite($suite)) {
533         if (not exists $sources{$s->{pkg}} or
534             not exists $sources{$s->{pkg}}{$s->{src_vers}{ver}}
535            ) {
536             push @sa_to_del,
537                 $s->{src_associations}{id};
538         }
539         $included_sa{$s->{pkg}}{$s->{src_vers}} = 1;
540     }
541     # Calculate which source packages are newly in this suite
542     for my $s (keys %sources) {
543         for my $v (keys %{$sources{$s}}) {
544             if (not exists $included_sa{$s} and
545                 not $included_sa{$s}{$v}) {
546                 push @sa_to_add,
547                     [$s,$v,$sources{$s}{$v}];
548             } else {
549                 $p->update() if defined $p;
550             }
551         }
552     }
553     # add new source packages
554     my $it = natatime 100, @sa_to_add;
555     while (my @v = $it->()) {
556         $schema->txn_do(
557             sub {
558                 for my $svm (@_) {
559                     my $s_id = $schema->resultset('SrcPkg')->
560                         get_src_pkg_id($svm->[0]);
561                     my $sv_id = $schema->resultset('SrcVer')->
562                         get_src_ver_id($s_id,$svm->[1],$maints->{$svm->[2]});
563                     $schema->resultset('SrcAssociation')->
564                         insert_suite_src_ver_association($suite_id,$sv_id);
565                 }
566             },
567                         @v
568                        );
569         $p->update($p->last_update()+
570                    scalar @v) if defined $p;
571     }
572     # remove associations for packages not in this suite
573     if (@sa_to_del) {
574         $it = natatime 1000, @sa_to_del;
575         while (my @v = $it->()) {
576             $schema->
577                 txn_do(sub {
578                            $schema->resultset('SrcAssociation')->
579                                search_rs({id => \@v})->
580                                delete();
581                        });
582         }
583     }
584     # update packages in this suite to have a modification time of now
585     $schema->resultset('SrcAssociation')->
586         search_rs({suite => $suite_id})->
587         update({modified => 'NOW()'});
588     ## Handle binary packages
589     my @bin_to_del;
590     my @bin_to_add;
591     my %included_bin;
592     # calculate which binary packages are no longer in this suite
593     for my $b ($schema->resultset('BinPkg')->
594                bin_pkg_and_ver_in_suite($suite)) {
595         if (not exists $bins{$b->{arch}{arch}} or
596             not exists $bins{$b->{arch}{arch}}{$b->{pkg}} or
597             ($bins{$b->{arch}{arch}}{$b->{pkg}}{bin_ver} ne
598              $b->{bin_vers}{ver}
599             )
600            ) {
601             push @bin_to_del,
602                 $b->{bin_associations}{id};
603         }
604         $included_bin{$b->{arch}{arch}}{$b->{pkg}} =
605             $b->{bin_vers}{ver};
606     }
607     # calculate which binary packages are newly in this suite
608     for my $a (keys %bins) {
609         for my $pkg (keys %{$bins{$a}}) {
610             if (not exists $included_bin{$a} or
611                 not exists $included_bin{$a}{$pkg} or
612                 $bins{$a}{$pkg}{bin_ver} ne
613                 $included_bin{$a}{$pkg}) {
614                 push @bin_to_add,
615                     $bins{$a}{$pkg};
616             } else {
617                 $p->update() if defined $p;
618             }
619         }
620     }
621     $it = natatime 100, @bin_to_add;
622     while (my @v = $it->()) {
623         $schema->txn_do(
624         sub {
625             for my $bvm (@_) {
626                 my $s_id = $schema->resultset('SrcPkg')->
627                     get_src_pkg_id($bvm->{source});
628                 my $sv_id = $schema->resultset('SrcVer')->
629                     get_src_ver_id($s_id,$bvm->{src_ver},$maints->{$bvm->{maint}});
630                 my $b_id = $schema->resultset('BinPkg')->
631                     get_bin_pkg_id($bvm->{bin});
632                 my $bv_id = $schema->resultset('BinVer')->
633                     get_bin_ver_id($b_id,$bvm->{bin_ver},
634                                    $archs->{$bvm->{arch}},$sv_id);
635                 $schema->resultset('BinAssociation')->
636                     insert_suite_bin_ver_association($suite_id,$bv_id);
637             }
638         },
639                         @v
640                        );
641         $p->update($p->last_update()+
642                    scalar @v) if defined $p;
643     }
644     if (@bin_to_del) {
645         $it = natatime 1000, @bin_to_del;
646         while (my @v = $it->()) {
647             $schema->
648                 txn_do(sub {
649                            $schema->resultset('BinAssociation')->
650                                search_rs({id => \@v})->
651                                delete();
652                        });
653         }
654     }
655     $schema->resultset('BinAssociation')->
656         search_rs({suite => $suite_id})->
657         update({modified => 'NOW()'});
658
659 }
660
661
662 =back
663
664 =cut
665
666 =head2 Suites
667
668 =over
669
670 =item load_suite
671
672      load_suite($schema,$codename,$suite,$version,$active);
673
674 =cut
675
676 sub load_suite {
677     my ($schema,$codename,$suite,$version,$active) = @_;
678     if (ref($codename)) {
679         ($codename,$suite,$version) =
680             @{$codename}{qw(Codename Suite Version)};
681         $active = 1;
682     }
683     my $s = $schema->resultset('Suite')->find_or_create({codename => $codename});
684     $s->suite_name($suite);
685     $s->version($version);
686     $s->active($active);
687     $s->update();
688     return $s;
689
690 }
691
692 =back
693
694 =cut
695
696 1;
697
698
699 __END__
700 # Local Variables:
701 # indent-tabs-mode: nil
702 # cperl-indent-level: 4
703 # End: