Output the score of all of the messages in a bug
+=over
+
+=item B<--skip-seen> Skip messages which have previously been classified
+
+=back
+
=item B<mark-spam>
Mark messages as spam if there is a regex match to subject or message
my %subcommands =
('auto-scan' => {function => \&auto_spamscan,
- arguments => {'ham_threshold=s' => -5,
+ arguments => {'ham_threshold|ham-threshold=s' => 0,
},
+ defaults => {ham_threshold => -5},
},
'score' => {function => \&score_bug,
+ arguments => {'skip_seen|skip-seen!' => 0
+ },
},
'mark-spam' => {function => \&mark_spam,
},
chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
}
my $opts =
- handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
+ handle_subcommand_arguments(\@ARGV,
+ $subcommands{$subcommand}{arguments},
+ $subcommands{$subcommand}{defaults},
+ );
$subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
my @bug_score =
spam_score_bug($bug_num,
$options->{spamc},
- $options->{spamc_opts});
+ $options->{spamc_opts},
+ $opts->{skip_seen},
+ );
print "$_->{score} $_->{message_id} $_->{subject}\n"
foreach @bug_score;
}
if ($is_spam) {
print STDERR "it's spam ($score)\n" if $DEBUG;
$spam->add_spam($mid);
- } elsif ($score < $options->{ham_threshold}) {
+ } elsif ($score < $opts->{ham_threshold}) {
print STDERR "it's really ham ($score)\n" if $DEBUG;
$spam->add_ham($mid);
}
}
sub spam_score_bug {
- my ($bug,$spamc,$spamc_opts) = @_;
+ my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
+ my $spam;
+ if ($skip_seen) {
+ $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
+ die "Unable to open bug log spam for $bug";
+ }
my @records;
foreachmsg(sub {
my ($bn,$rec,$mid) = @_;
- my $score =
+ my $score;
+ if ($skip_seen) {
+ if ($spam->is_spam($mid)) {
+ $score = 999;
+ } elsif ($spam->is_ham($mid)) {
+ $score = -999;
+ }
+ }
+ $score //=
spam_score($rec,$spamc,$spamc_opts);
+ my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
push @records,
{message_id => $mid,
score => $score,
- subject => ($rec->{text} =~ /^Subject: *(.+)/i)[0],
+ subject => $subject,
};
},
$bug
if (not $childpid) {
die "Unable to fork spamc";
}
- print {$spamc} $record->{text};
- close($spamc) or die "Unable to close spamc: $!";
+ print {$spamc_in} $record->{text};
+ close($spamc_in) or die "Unable to close spamc_in: $!";
waitpid($childpid,0);
- if ($DEBUG) {
- print STDERR "[$?;".($? >> 8)."] ";
- print STDERR map {s/\n//; $_ } <$spamc_out>;
- print STDERR " ";
- }
- close($spamc_out);
- $SIG{"PIPE"} = $old_sig;
if ($? >> 8) {
$is_spam = 1;
}
my ($first_line,@report) = <$spamc_out>;
+ if ($DEBUG) {
+ print STDERR "[$?;".($? >> 8)."] ";
+ print STDERR $first_line,@report;
+ print STDERR " ";
+ }
if (defined $first_line) {
chomp $first_line;
($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
$report = join('',@report);
}
+ close($spamc_out);
+ $SIG{"PIPE"} = $old_sig;
};
if ($@) {
carp "processing of message failed [$@]\n";
my ($msg_id) = record_regex($record,
qr/^Message-Id:\s+<(.+)>/mi);
next unless defined $msg_id;
+ print STDERR "examining $msg_id: " if $DEBUG;
if ($msg_id =~ /$config{email_domain}$/) {
- print STDERR "skipping $msg_id\n" if $DEBUG;
+ print STDERR "skipping\n" if $DEBUG;
next;
}
- print STDERR "examining $msg_id: " if $DEBUG;
if ($seen_msgids{$msg_id}) {
print STDERR "already seen\n" if $DEBUG;
next;
}
$seen_msgids{$msg_id}=1;
$sub->($bug_num,$record,$msg_id);
+ print STDERR "\n" if $DEBUG;
}
}