1 # This module is part of debbugs, and is released
2 # under the terms of the GPL version 2, or any later
3 # version at your option.
4 # See the file README and COPYING for more information.
6 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>.
12 Debbugs::Bugs -- Bug selection routines for debbugs
16 use Debbugs::Bugs qw(get_bugs);
21 This module is a replacement for all of the various methods of
22 selecting different types of bugs.
24 It implements a single function, get_bugs, which defines the master
25 interface for selecting bugs.
27 It attempts to use subsidiary functions to actually do the selection,
28 in the order specified in the configuration files. [Unless you're
29 insane, they should be in order from fastest (and often most
30 incomplete) to slowest (and most complete).]
41 use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
42 use Exporter qw(import);
46 $DEBUG = 0 unless defined $DEBUG;
50 @EXPORT_OK = (qw(get_bugs count_bugs newest_bug bug_filter));
51 $EXPORT_TAGS{all} = [@EXPORT_OK];
54 use Debbugs::Config qw(:config);
55 use Params::Validate qw(validate_with :types);
57 use Debbugs::Status qw(splitpackages get_bug_status);
58 use Debbugs::Packages qw(getsrcpkgs getpkgsrc);
59 use Debbugs::Common qw(getparsedaddrs package_maintainer getmaintainers make_list hash_slice);
60 use Fcntl qw(O_RDONLY);
61 use MLDBM qw(DB_File Storable);
62 use List::AllUtils qw(first max);
71 The following parameters can either be a single scalar or a reference
72 to an array. The parameters are ANDed together, and the elements of
73 arrayrefs are a parameter are ORed. Future versions of this may allow
74 for limited regular expressions, and/or more complex expressions.
78 =item package -- name of the binary package
80 =item src -- name of the source package
82 =item maint -- address of the maintainer
84 =item submitter -- address of the submitter
86 =item severity -- severity of the bug
88 =item status -- status of the bug
92 =item owner -- owner of the bug
94 =item correspondent -- address of someone who sent mail to the log
96 =item affects -- bugs which affect this package
98 =item dist -- distribution (I don't know about this one yet)
100 =item bugs -- list of bugs to search within
102 =item function -- see description below
106 =head3 Special options
108 The following options are special options used to modulate how the
109 searches are performed.
113 =item archive -- whether to search archived bugs or normal bugs;
114 defaults to false. As a special case, if archive is 'both', but
115 archived and unarchived bugs are returned.
117 =item usertags -- set of usertags and the bugs they are applied to
122 =head3 Subsidiary routines
124 All subsidiary routines get passed exactly the same set of options as
125 get_bugs. If for some reason they are unable to handle the options
126 passed (for example, they don't have the right type of index for the
127 type of selection) they should die as early as possible. [Using
128 Params::Validate and/or die when files don't exist makes this fairly
131 This function will then immediately move on to the next subroutine,
132 giving it the same arguments.
136 This option allows you to provide an arbitrary function which will be
137 given the information in the index.db file. This will be super, super
138 slow, so only do this if there's no other way to write the search.
140 You'll be given a list (which you can turn into a hash) like the
143 (pkg => ['a','b'], # may be a scalar (most common)
146 submitter => 'boo@baz.com',
147 severity => 'serious',
148 tags => ['a','b','c'], # may be an empty arrayref
151 The function should return 1 if the bug should be included; 0 if the
156 state $_non_search_key_regex = qr/^(bugs|archive|usertags|schema)$/;
158 my %_get_bugs_common_options =
159 (package => {type => SCALAR|ARRAYREF,
162 src => {type => SCALAR|ARRAYREF,
165 maint => {type => SCALAR|ARRAYREF,
168 submitter => {type => SCALAR|ARRAYREF,
171 severity => {type => SCALAR|ARRAYREF,
174 status => {type => SCALAR|ARRAYREF,
177 tag => {type => SCALAR|ARRAYREF,
180 owner => {type => SCALAR|ARRAYREF,
183 dist => {type => SCALAR|ARRAYREF,
186 correspondent => {type => SCALAR|ARRAYREF,
189 affects => {type => SCALAR|ARRAYREF,
192 function => {type => CODEREF,
195 bugs => {type => SCALAR|ARRAYREF,
198 archive => {type => BOOLEAN|SCALAR,
201 usertags => {type => HASHREF,
204 newest => {type => SCALAR|ARRAYREF,
207 schema => {type => OBJECT,
213 state $_get_bugs_options = {%_get_bugs_common_options};
215 my %param = validate_with(params => \@_,
216 spec => $_get_bugs_options,
220 my %options = %param;
222 if ($options{archive} eq 'both') {
223 push @bugs, get_bugs(%options,archive=>0);
224 push @bugs, get_bugs(%options,archive=>1);
226 @bugs{@bugs} = @bugs;
229 # A configuration option will set an array that we'll use here instead.
230 for my $routine (qw(Debbugs::Bugs::get_bugs_by_db Debbugs::Bugs::get_bugs_by_idx Debbugs::Bugs::get_bugs_flatfile)) {
231 my ($package) = $routine =~ m/^(.+)\:\:/;
232 eval "use $package;";
234 # We output errors here because using an invalid function
235 # in the configuration file isn't something that should
237 warn "use $package failed with $@";
240 @bugs = eval "${routine}(\%options)";
243 # We don't output errors here, because failure here
244 # via die may be a perfectly normal thing.
245 print STDERR "$@" if $DEBUG;
250 # If no one succeeded, die
259 count_bugs(function => sub {...})
261 Uses a subroutine to classify bugs into categories and return the
262 number of bugs which fall into those categories
267 my %param = validate_with(params => \@_,
268 spec => {function => {type => CODEREF,
270 archive => {type => BOOLEAN,
276 if ($param{archive}) {
277 $flatfile = IO::File->new("$config{spool_dir}/index.archive", 'r')
278 or die "Unable to open $config{spool_dir}/index.archive for reading: $!";
281 $flatfile = IO::File->new("$config{spool_dir}/index.db", 'r')
282 or die "Unable to open $config{spool_dir}/index.db for reading: $!";
286 if (m/^(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+\[\s*([^]]*)\s*\]\s+(\w+)\s+(.*)$/) {
287 my @x = $param{function}->(pkg => $1,
295 $count{$_}++ foreach @x;
304 my $bug = newest_bug();
306 Returns the bug number of the newest bug, which is nextnumber-1.
311 my $nn_fh = IO::File->new("$config{spool_dir}/nextnumber",'r')
312 or die "Unable to open $config{spool_dir}nextnumber for reading: $!";
314 my $next_number = <$nn_fh>;
317 return $next_number-1;
324 Allows filtering bugs on commonly used criteria
331 my %param = validate_with(params => \@_,
332 spec => {bug => {type => ARRAYREF|SCALAR,
335 status => {type => HASHREF|ARRAYREF,
338 seen_merged => {type => HASHREF,
341 repeat_merged => {type => BOOLEAN,
344 include => {type => HASHREF,
347 exclude => {type => HASHREF,
350 min_days => {type => SCALAR,
353 max_days => {type => SCALAR,
358 if (exists $param{repeat_merged} and
359 not $param{repeat_merged} and
360 not defined $param{seen_merged}) {
361 croak "repeat_merged false requires seen_merged to be passed";
363 if (not exists $param{bug} and not exists $param{status}) {
364 croak "one of bug or status must be passed";
367 if (not exists $param{status}) {
368 my $location = getbuglocation($param{bug}, 'summary');
369 return 0 if not defined $location or not length $location;
370 $param{status} = readbug( $param{bug}, $location );
371 return 0 if not defined $param{status};
374 if (exists $param{include}) {
375 return 1 if (!__bug_matches($param{include}, $param{status}));
377 if (exists $param{exclude}) {
378 return 1 if (__bug_matches($param{exclude}, $param{status}));
380 if (exists $param{repeat_merged} and not $param{repeat_merged}) {
381 my @merged = sort {$a<=>$b} $param{bug}, split(/ /, $param{status}{mergedwith});
382 return 1 if first {defined $_} @{$param{seen_merged}}{@merged};
383 @{$param{seen_merged}}{@merged} = (1) x @merged;
385 my $daysold = int((time - $param{status}{date}) / 86400); # seconds to days
386 if (exists $param{min_days}) {
387 return 1 unless $param{min_days} <= $daysold;
389 if (exists $param{max_days}) {
390 return 1 unless $param{max_days} == -1 or
391 $param{max_days} >= $daysold;
397 =head2 get_bugs_by_idx
399 This routine uses the by-$index.idx indicies to try to speed up
406 state $_get_bugs_by_idx_options =
407 {hash_slice(%_get_bugs_common_options,
408 (qw(package submitter severity tag archive),
409 qw(owner src maint bugs correspondent),
410 qw(affects usertags newest))
414 my %param = validate_with(params => \@_,
415 spec => $_get_bugs_by_idx_options
419 # If we're given an empty maint (unmaintained packages), we can't
420 # handle it, so bail out here
421 for my $maint (make_list(exists $param{maint}?$param{maint}:[])) {
422 if (defined $maint and $maint eq '') {
423 die "Can't handle empty maint (unmaintained packages) in get_bugs_by_idx";
426 if ($param{newest}) {
427 my $newest_bug = newest_bug();
428 my @bugs = ($newest_bug - max(make_list($param{newest})) + 1) .. $newest_bug;
429 $param{bugs} = [exists $param{bugs}?make_list($param{bugs}):(),
433 # We handle src packages, maint and maintenc by mapping to the
434 # appropriate binary packages, then removing all packages which
435 # don't match all queries
436 my @packages = __handle_pkg_src_and_maint(map {exists $param{$_}?($_,$param{$_}):()}
437 qw(package src maint)
439 if (exists $param{package} or
440 exists $param{src} or
441 exists $param{maint}) {
442 delete @param{qw(maint src)};
443 $param{package} = [@packages];
445 my $keys = grep {$_ !~ $_non_search_key_regex} keys(%param);
446 die "Need at least 1 key to search by" unless $keys;
447 my $arc = $param{archive} ? '-arc':'';
449 for my $key (grep {$_ !~ $_non_search_key_regex} keys %param) {
451 $index = 'submitter-email' if $key eq 'submitter';
452 $index = "$config{spool_dir}/by-${index}${arc}.idx";
453 tie(%idx, MLDBM => $index, O_RDONLY)
454 or die "Unable to open $index: $!";
455 my %bug_matching = ();
456 for my $search (make_list($param{$key})) {
457 for my $bug (keys %{$idx{$search}||{}}) {
458 next if $bug_matching{$bug};
459 # increment the number of searches that this bug matched
461 $bug_matching{$bug}=1;
463 if ($search ne lc($search)) {
464 for my $bug (keys %{$idx{lc($search)}||{}}) {
465 next if $bug_matching{$bug};
466 # increment the number of searches that this bug matched
468 $bug_matching{$bug}=1;
472 if ($key eq 'tag' and exists $param{usertags}) {
473 for my $bug (make_list(grep {defined $_ } @{$param{usertags}}{make_list($param{tag})})) {
474 next if $bug_matching{$bug};
476 $bug_matching{$bug}=1;
479 untie %idx or die 'Unable to untie %idx';
483 for my $bug (make_list($param{bugs})) {
487 # Throw out results that do not match all of the search specifications
488 return map {$keys <= $bugs{$_}?($_):()} keys %bugs;
492 =head2 get_bugs_by_db
494 This routine uses the database to try to speed up
500 state $_get_bugs_by_db_options =
501 {hash_slice(%_get_bugs_common_options,
502 (qw(package submitter severity tag archive),
503 qw(owner src maint bugs correspondent),
504 qw(affects usertags newest))
506 schema => {type => OBJECT,
510 my %param = validate_with(params => \@_,
511 spec => $_get_bugs_by_db_options,
515 my $s = $param{schema};
516 my $keys = grep {$_ !~ $_non_search_key_regex} keys(%param);
517 die "Need at least 1 key to search by" unless $keys;
518 my $rs = $s->resultset('Bug');
519 if (exists $param{severity}) {
520 $rs = $rs->search({'severity.severity' =>
521 [make_list($param{severity})],
523 {join => 'severity'},
526 for my $key (qw(owner submitter done)) {
527 if (exists $param{$key}) {
528 $rs = $rs->search({"${key}.addr" =>
529 [make_list($param{$key})],
535 if (exists $param{newest}) {
538 {order_by => {-desc => 'me.creation'},
539 rows => max(make_list($param{newest})),
543 if (exists $param{correspondent}) {
545 $s->resultset('Message')->
546 search({'correspondent.addr' =>
547 [make_list($param{correspondent})],
549 {join => {message_correspondents => 'correspondent'},
551 group_by => ['me.id'],
554 $rs = $rs->search({'bug_messages.message' =>
555 {-in => $message_rs->get_column('id')->as_query()},
557 {join => 'bug_messages',
561 if (exists $param{affects}) {
562 my @aff_list = make_list($param{affects});
563 s/^src:// foreach @aff_list;
564 $rs = $rs->search({-or => {'bin_pkg.pkg' =>
568 'me.unknown_affects' =>
572 {join => [{bug_affects_binpackages => 'bin_pkg'},
573 {bug_affects_srcpackages => 'src_pkg'},
578 if (exists $param{package}) {
579 $rs = $rs->search({-or => {'bin_pkg.pkg' =>
580 [make_list($param{package})],
581 'me.unknown_packages' =>
582 [make_list($param{package})]},
584 {join => {bug_binpackages => 'bin_pkg'}});
586 if (exists $param{maint}) {
588 map {$_ eq '' ? undef : $_}
589 make_list($param{maint});
591 $s->resultset('BinPkg')->
592 search({'correspondent.addr' => [@maint_list]},
593 {join => {bin_vers =>
595 {maintainer => 'correspondent'}}},
597 group_by => ['me.id'],
601 $s->resultset('SrcPkg')->
602 search({'correspondent.addr' => [@maint_list]},
603 {join => {src_vers =>
604 {maintainer => 'correspondent'}},
606 group_by => ['me.id'],
609 $rs = $rs->search({-or => {'bug_binpackages.bin_pkg' =>
610 { -in => $bin_pkgs_rs->get_column('id')->as_query},
611 'bug_srcpackages.src_pkg' =>
612 { -in => $src_pkgs_rs->get_column('id')->as_query},
615 {join => ['bug_binpackages',
620 if (exists $param{src}) {
621 # identify all of the srcpackages and binpackages that match first
623 $s->resultset('SrcPkg')->
624 search({-or => [map {('me.pkg' => $_,
626 make_list($param{src})],
629 group_by => ['me.id'],
633 $s->resultset('BinPkg')->
634 search({-or => [map {('src_pkg.pkg' => $_,
636 make_list($param{src})],
638 {join => {bin_vers => {src_ver => 'src_pkg'}},
640 group_by => ['me.id'],
642 $rs = $rs->search({-or => {'bug_binpackages.bin_pkg' =>
643 { -in => $bin_pkgs_rs->get_column('id')->as_query},
644 'bug_srcpackages.src_pkg' =>
645 { -in => $src_pkgs_rs->get_column('id')->as_query},
646 'me.unknown_packages' =>
647 [make_list($param{src})],
650 {join => ['bug_binpackages',
655 # tags are very odd, because we must handle usertags.
656 if (exists $param{tag}) {
657 # bugs from usertags which matter
658 my %bugs_matching_usertags;
659 for my $bug (make_list(grep {defined $_ }
660 @{$param{usertags}}{make_list($param{tag})})) {
661 $bugs_matching_usertags{$bug} = 1;
663 # we want all bugs which either match the tag name given in
664 # param, or have a usertag set which matches one of the tag
665 # names given in param.
666 $rs = $rs->search({-or => {map {('tag.tag' => $_)}
667 make_list($param{tag}),
668 map {('me.id' => $_)}
669 keys %bugs_matching_usertags
672 {join => {bug_tags => 'tag'}});
674 if (exists $param{bugs}) {
675 $rs = $rs->search({-or => {map {('me.id' => $_)}
676 make_list($param{bugs})}
680 if (defined $param{archive} and $param{archive} ne 'both') {
681 $rs = $rs->search({'me.archived' => $param{archive}});
683 return $rs->get_column('id')->all();
687 =head2 get_bugs_flatfile
689 This is the fallback search routine. It should be able to complete all
690 searches. [Or at least, that's the idea.]
694 state $_get_bugs_flatfile_options =
695 {hash_slice(%_get_bugs_common_options,
696 map {$_ eq 'dist'?():($_)} keys %_get_bugs_common_options
700 sub get_bugs_flatfile{
701 my %param = validate_with(params => \@_,
702 spec => $_get_bugs_flatfile_options
705 if ($param{newest}) {
706 my $newest_bug = newest_bug();
707 my @bugs = ($newest_bug - max(make_list($param{newest})) + 1) .. $newest_bug;
708 $param{bugs} = [exists $param{bugs}?make_list($param{bugs}):(),
712 if ($param{archive}) {
713 $flatfile = IO::File->new("$config{spool_dir}/index.archive", 'r')
714 or die "Unable to open $config{spool_dir}/index.archive for reading: $!";
717 $flatfile = IO::File->new("$config{spool_dir}/index.db", 'r')
718 or die "Unable to open $config{spool_dir}/index.db for reading: $!";
721 if (exists $param{tag} and exists $param{usertags}) {
722 # This complex slice makes a hash with the bugs which have the
723 # usertags passed in $param{tag} set.
724 @usertag_bugs{make_list(@{$param{usertags}}{make_list($param{tag})})
725 } = (1) x make_list(@{$param{usertags}}{make_list($param{tag})});
727 my $unmaintained_packages = 0;
728 # unmaintained packages is a special case
729 my @maints = make_list(exists $param{maint}?$param{maint}:[]);
731 for my $maint (@maints) {
732 if (defined $maint and $maint eq '' and not $unmaintained_packages) {
733 $unmaintained_packages = 1;
734 our %maintainers = %{getmaintainers()};
735 $param{function} = [(exists $param{function}?
736 (ref $param{function}?@{$param{function}}:$param{function}):()),
738 foreach my $try (make_list($d{"pkg"})) {
739 next unless length $try;
740 ($try) = $try =~ m/^(?:src:)?(.+)/;
741 return 1 if not exists $maintainers{$try};
747 elsif (defined $maint and $maint ne '') {
748 push @{$param{maint}},$maint;
751 # We handle src packages, maint and maintenc by mapping to the
752 # appropriate binary packages, then removing all packages which
753 # don't match all queries
754 my @packages = __handle_pkg_src_and_maint(map {exists $param{$_}?($_,$param{$_}):()}
755 qw(package src maint)
757 if (exists $param{package} or
758 exists $param{src} or
759 exists $param{maint}) {
760 delete @param{qw(maint src)};
761 $param{package} = [@packages] if @packages;
765 if (exists $param{bugs}) {
766 $bugs{$_} = 1 for make_list($param{bugs});
769 # These queries have to be handled by get_bugs_by_idx
770 if (exists $param{owner}
771 or exists $param{correspondent}
772 or exists $param{affects}) {
773 $bugs{$_} = 1 for get_bugs_by_idx(map {exists $param{$_}?($_,$param{$_}):()}
774 qw(owner correspondent affects),
779 BUG: while (<$flatfile>) {
780 next unless m/^(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+\[\s*(.*)\s*\]\s+(\w+)\s+(.*)$/;
781 my ($pkg,$bug,$time,$status,$submitter,$severity,$tags) = ($1,$2,$3,$4,$5,$6,$7);
782 next if $grep_bugs and not exists $bugs{$bug};
783 if (exists $param{package}) {
784 my @packages = splitpackages($pkg);
785 next unless grep { my $pkg_list = $_;
786 grep {$pkg_list eq $_} make_list($param{package})
789 if (exists $param{src}) {
790 my @src_packages = map { getsrcpkgs($_)} make_list($param{src});
791 my @packages = splitpackages($pkg);
792 next unless grep { my $pkg_list = $_;
793 grep {$pkg_list eq $_} @packages
796 if (exists $param{submitter}) {
797 my @p_addrs = map {lc($_->address)}
798 map {getparsedaddrs($_)}
799 make_list($param{submitter});
800 my @f_addrs = map {$_->address}
801 getparsedaddrs($submitter||'');
802 next unless grep { my $f_addr = $_;
803 grep {$f_addr eq $_} @p_addrs
806 next if exists $param{severity} and not grep {$severity eq $_} make_list($param{severity});
807 next if exists $param{status} and not grep {$status eq $_} make_list($param{status});
808 if (exists $param{tag}) {
810 # either a normal tag, or a usertag must be set
811 $bug_ok = 1 if exists $param{usertags} and $usertag_bugs{$bug};
812 my @bug_tags = split ' ', $tags;
813 $bug_ok = 1 if grep {my $bug_tag = $_;
814 grep {$bug_tag eq $_} make_list($param{tag});
818 # We do this last, because a function may be slow...
819 if (exists $param{function}) {
820 my @bug_tags = split ' ', $tags;
821 my @packages = splitpackages($pkg);
822 my $package = (@packages > 1)?\@packages:$packages[0];
823 for my $function (make_list($param{function})) {
825 $function->(pkg => $package,
828 submitter => $submitter,
829 severity => $severity,
839 =head1 PRIVATE FUNCTIONS
841 =head2 __handle_pkg_src_and_maint
843 my @packages = __handle_pkg_src_and_maint(map {exists $param{$_}?($_,$param{$_}):()}
844 qw(package src maint)
847 Turn package/src/maint into a list of packages
851 sub __handle_pkg_src_and_maint{
852 my %param = validate_with(params => \@_,
853 spec => {package => {type => SCALAR|ARRAYREF,
856 src => {type => SCALAR|ARRAYREF,
859 maint => {type => SCALAR|ARRAYREF,
867 @packages = make_list($param{package}) if exists $param{package};
868 my $package_keys = @packages?1:0;
870 @packages{@packages} = (1) x @packages;
871 if (exists $param{src}) {
872 # We only want to increment the number of keys if there is
875 # in case there are binaries with the same name as the
878 for my $package ((map {getsrcpkgs($_)} make_list($param{src}))) {
879 $packages{$package}++ unless exists $_temp_p{$package};
880 $_temp_p{$package} = 1;
883 for my $package (make_list($param{src})) {
884 $packages{"src:$package"}++ unless exists $_temp_p{"src:$package"};
885 $_temp_p{"src:$package"} = 1;
887 # As a temporary hack, we will also include $param{src}
888 # in this list for packages passed which do not have a
889 # corresponding binary package
890 if (not exists getpkgsrc()->{$package}) {
891 $packages{$package}++ unless exists $_temp_p{$package};
892 $_temp_p{$package} = 1;
895 $package_keys += $key_inc;
897 if (exists $param{maint}) {
900 for my $package (package_maintainer(maintainer=>$param{maint})) {
901 $packages{$package}++ unless exists $_temp_p{$package};
902 $_temp_p{$package} = 1;
905 $package_keys += $key_inc;
907 return grep {$packages{$_} >= $package_keys} keys %packages;
910 state $field_match = {
911 'subject' => \&__contains_field_match,
913 my ($field, $values, $status) = @_;
914 my %values = map {$_=>1} @$values;
915 foreach my $t (split /\s+/, $status->{$field}) {
916 return 1 if (defined $values{$t});
920 'severity' => \&__exact_field_match,
921 'pending' => \&__exact_field_match,
922 'package' => \&__exact_field_match,
923 'originator' => \&__contains_field_match,
924 'forwarded' => \&__contains_field_match,
925 'owner' => \&__contains_field_match,
929 my ($hash, $status) = @_;
930 foreach my $key( keys( %$hash ) ) {
931 my $value = $hash->{$key};
932 next unless exists $field_match->{$key};
933 my $sub = $field_match->{$key};
934 if (not defined $sub) {
935 die "No defined subroutine for key: $key";
937 return 1 if ($sub->($key, $value, $status));
942 sub __exact_field_match {
943 my ($field, $values, $status) = @_;
944 my @values = @$values;
945 my @ret = grep {$_ eq $status->{$field} } @values;
949 sub __contains_field_match {
950 my ($field, $values, $status) = @_;
951 foreach my $data (@$values) {
952 return 1 if (index($status->{$field}, $data) > -1);