Output the score of all of the messages in a bug
+=over
+
+=item B<--skip-seen> Skip messages which have previously been classified
+
+=back
+
=item B<mark-spam>
Mark messages as spam if there is a regex match to subject or message
use Debbugs::Log::Spam;
use Debbugs::Config qw(:config);
use Debbugs::Command qw(:all);
+use Debbugs::Common qw(getbuglocation);
use IPC::Open3 qw(open3);
use Carp;
man => 0,
verbose => 0,
quiet => 0,
- quick => 0,
spamc => 'spamc',
spamc_opts => [],
);
handle_main_arguments(\%options,
- 'quick|q',
'service|s',
'sysconfdir|c',
'spamc=s' => 0,
'spamc_opts|spamc-opts=s@' => 0,
'spool_dir|spool-dir=s',
+ 'quiet|q:+',
+ 'verbose|v:+',
'debug|d+','help|h|?','man|m');
my %subcommands =
('auto-scan' => {function => \&auto_spamscan,
- arguments => {'ham_threshold=s' => -5,
+ arguments => {'ham_threshold|ham-threshold=s' => 0,
+ 'skip_missing|skip-missing!' => 0,
},
+ defaults => {ham_threshold => -5,
+ skip_missing => 0,
+ },
},
'score' => {function => \&score_bug,
+ arguments => {'skip_seen|skip-seen!' => 0
+ },
},
'mark-spam' => {function => \&mark_spam,
},
chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
}
my $opts =
- handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
+ handle_subcommand_arguments(\@ARGV,
+ $subcommands{$subcommand}{arguments},
+ $subcommands{$subcommand}{defaults},
+ );
$subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
if ($mid =~ /\Q$regex\E/) {
$is_match = 1;
}
- if ($spam_ham eq 'spam') {
- $spam->add_spam($mid);
- } else {
- $spam->add_ham($mid);
+ if ($is_match) {
+ print STDERR "it's a match" if $DEBUG;
+ if ($spam_ham eq 'spam') {
+ $spam->add_spam($mid);
+ } else {
+ $spam->add_ham($mid);
+ }
}
},
$bug_num
);
+ $spam->save();
}
}
for my $bug_num (@{$argv}) {
my @bug_score =
spam_score_bug($bug_num,
- $opts->{spamc},
- $opts->{spamc_opts});
+ $options->{spamc},
+ $options->{spamc_opts},
+ $opts->{skip_seen},
+ );
print "$_->{score} $_->{message_id} $_->{subject}\n"
foreach @bug_score;
}
my ($options,$opts,$config,$argv) = @_;
for my $bug_num (@{$argv}) {
+ if ($opts->{skip_missing} and
+ not defined getbuglocation($bug_num,'log')) {
+ print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
+ next;
+ }
my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
die "Unable to open bug log spam for $bug_num";
foreachmsg(sub {
if ($is_spam) {
print STDERR "it's spam ($score)\n" if $DEBUG;
$spam->add_spam($mid);
- } elsif ($score < $options->{ham_threshold}) {
+ } elsif ($score < $opts->{ham_threshold}) {
print STDERR "it's really ham ($score)\n" if $DEBUG;
$spam->add_ham($mid);
}
}
sub spam_score_bug {
- my ($bug,$spamc,$spamc_opts) = @_;
+ my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
+ my $spam;
+ if ($skip_seen) {
+ $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
+ die "Unable to open bug log spam for $bug";
+ }
my @records;
foreachmsg(sub {
my ($bn,$rec,$mid) = @_;
- my $score =
+ my $score;
+ if ($skip_seen) {
+ if ($spam->is_spam($mid)) {
+ $score = 999;
+ } elsif ($spam->is_ham($mid)) {
+ $score = -999;
+ }
+ }
+ $score //=
spam_score($rec,$spamc,$spamc_opts);
+ my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
push @records,
{message_id => $mid,
score => $score,
- subject => ($rec->{text} =~ /^Subject: *(.+)/i)[0],
+ subject => $subject,
};
},
$bug
if (not $childpid) {
die "Unable to fork spamc";
}
- print {$spamc} $record->{text};
- close($spamc) or die "Unable to close spamc: $!";
+ print {$spamc_in} $record->{text};
+ close($spamc_in) or die "Unable to close spamc_in: $!";
waitpid($childpid,0);
- if ($DEBUG) {
- print STDERR "[$?;".($? >> 8)."] ";
- print STDERR map {s/\n//; $_ } <$spamc_out>;
- print STDERR " ";
- }
- close($spamc_out);
- $SIG{"PIPE"} = $old_sig;
if ($? >> 8) {
$is_spam = 1;
}
my ($first_line,@report) = <$spamc_out>;
+ if ($DEBUG) {
+ print STDERR "[$?;".($? >> 8)."] ";
+ print STDERR $first_line,@report;
+ print STDERR " ";
+ }
if (defined $first_line) {
chomp $first_line;
($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
$report = join('',@report);
}
+ close($spamc_out);
+ $SIG{"PIPE"} = $old_sig;
};
if ($@) {
carp "processing of message failed [$@]\n";
my ($msg_id) = record_regex($record,
qr/^Message-Id:\s+<(.+)>/mi);
next unless defined $msg_id;
+ print STDERR "examining $msg_id: " if $DEBUG;
if ($msg_id =~ /$config{email_domain}$/) {
- print STDERR "skipping $msg_id\n" if $DEBUG;
+ print STDERR "skipping\n" if $DEBUG;
next;
}
- print STDERR "examining $msg_id: " if $DEBUG;
if ($seen_msgids{$msg_id}) {
print STDERR "already seen\n" if $DEBUG;
next;
}
$seen_msgids{$msg_id}=1;
$sub->($bug_num,$record,$msg_id);
+ print STDERR "\n" if $DEBUG;
}
}