Output the score of all of the messages in a bug
+=over
+
+=item B<--skip-seen> Skip messages which have previously been classified
+
+=back
+
=item B<mark-spam>
Mark messages as spam if there is a regex match to subject or message
Mark messages as ham if there is a regex match to subject or message
id
+=item B<learn>
+
+Learn from messages which are ham/spam
+
=back
=head1 EXAMPLES
+Start spamd:
+
+ /usr/sbin/spamd --socketpath=/home/debbugs/spamd_socket \
+ --nouser-config --cf='include /home/debbugs/.spamassassin/user_prefs' \
+ --cf='allow_user_rules 1' --allow-tell;
+
+Then score bugs:
+
+ debbugs-spam --spamc-opts '-U' --spamc-opts '/home/debbugs/spamd_socket' \
+ score 859123;
=cut
my %subcommands =
('auto-scan' => {function => \&auto_spamscan,
- arguments => {'ham_threshold=s' => -5,
+ arguments => {'ham_threshold|ham-threshold=s' => 0,
},
+ defaults => {ham_threshold => -5},
},
'score' => {function => \&score_bug,
+ arguments => {'skip_seen|skip-seen!' => 0
+ },
},
'mark-spam' => {function => \&mark_spam,
},
'mark-ham' => {function => \&mark_ham,
},
+ 'learn' => {function => \&learn,
+ },
'help' => {function => sub {pod2usage({verbose => 2});}}
);
chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
}
my $opts =
- handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
+ handle_subcommand_arguments(\@ARGV,
+ $subcommands{$subcommand}{arguments},
+ $subcommands{$subcommand}{defaults},
+ );
$subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
my $body = $rec->{text};
my ($subject) = $body =~ /^Subject: *(.+)$/mi;
my $is_match = 0;
- if ($subject =~ /\Q$regex\E/) {
+ if ($subject =~ /$regex/) {
$is_match = 1;
}
- if ($mid =~ /\Q$regex\E/) {
+ if ($mid =~ /$regex/) {
$is_match = 1;
}
- if ($spam_ham eq 'spam') {
- $spam->add_spam($mid);
+ if ($is_match) {
+ print STDERR "it's a match" if $DEBUG;
+ if ($spam_ham eq 'spam') {
+ $spam->add_spam($mid);
+ } else {
+ $spam->add_ham($mid);
+ }
+ }
+ },
+ $bug_num
+ );
+ $spam->save();
+ }
+}
+
+sub learn {
+ my ($options,$opts,$config,$argv) = @_;
+ for my $bug_num (@{$argv}) {
+ my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
+ die "Unable to open bug log spam for $bug_num";
+ foreachmsg(sub {
+ my ($bn,$rec,$mid) = @_;
+ my $score;
+ if ($spam->is_spam($mid)) {
+ $score //=
+ spam_score($rec,$options->{spamc},
+ [@{$options->{spamc_opts}},
+ '-L','spam'
+ ]
+ );
+ print STDERR "learning spam" if $DEBUG;
+ } elsif ($spam->is_ham($mid)) {
+ $score //=
+ spam_score($rec,$options->{spamc},
+ [@{$options->{spamc_opts}},
+ '-L','ham'
+ ]
+ );
+ print STDERR "learning ham" if $DEBUG;
} else {
- $spam->add_ham($mid);
+ print STDERR "not learning" if $DEBUG;
}
+ print STDERR " from $mid" if $DEBUG;
},
$bug_num
);
my @bug_score =
spam_score_bug($bug_num,
$options->{spamc},
- $options->{spamc_opts});
+ $options->{spamc_opts},
+ $opts->{skip_seen},
+ );
print "$_->{score} $_->{message_id} $_->{subject}\n"
foreach @bug_score;
}
if ($is_spam) {
print STDERR "it's spam ($score)\n" if $DEBUG;
$spam->add_spam($mid);
- } elsif ($score < $options->{ham_threshold}) {
+ } elsif ($score < $opts->{ham_threshold}) {
print STDERR "it's really ham ($score)\n" if $DEBUG;
$spam->add_ham($mid);
}
}
sub spam_score_bug {
- my ($bug,$spamc,$spamc_opts) = @_;
+ my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
+ my $spam;
+ if ($skip_seen) {
+ $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
+ die "Unable to open bug log spam for $bug";
+ }
my @records;
foreachmsg(sub {
my ($bn,$rec,$mid) = @_;
- my $score =
+ my $score;
+ if ($skip_seen) {
+ if ($spam->is_spam($mid)) {
+ $score = 999;
+ } elsif ($spam->is_ham($mid)) {
+ $score = -999;
+ }
+ }
+ $score //=
spam_score($rec,$spamc,$spamc_opts);
+ my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
push @records,
{message_id => $mid,
score => $score,
- subject => ($rec->{text} =~ /^Subject: *(.+)/i)[0],
+ subject => $subject,
};
},
$bug
my ($score,$threshold,$report);
my $is_spam = 0;
eval {
+ $report = '';
+ $score = 0;
+ $threshold = 5;
my ($spamc_in,$spamc_out);
my $old_sig = $SIG{"PIPE"};
$SIG{"PIPE"} = sub {
};
my $childpid =
open3($spamc_in,$spamc_out,0,
- $spamc,'-E',@{$spamc_opts}) or
+ $spamc,'-E','--headers',@{$spamc_opts}) or
die "Unable to fork spamc: $!";
if (not $childpid) {
die "Unable to fork spamc";
}
- print {$spamc} $record->{text};
- close($spamc) or die "Unable to close spamc: $!";
+ print {$spamc_in} $record->{text};
+ close($spamc_in) or die "Unable to close spamc_in: $!";
waitpid($childpid,0);
+ my $exit_code = $? >> 8;
+ if ($exit_code) {
+ $is_spam = 1;
+ }
+ my $in_spam_header = 0;
+ while (<$spamc_out>) {
+ if (/^X-Spam/ or (/^\s+/ and $in_spam_header)) {
+ $in_spam_header = 1;
+ $report .= $_;
+ if (/^X-Spam-Status: (Yes|No), score=(-?[\d\.]+) required=(-?[\d\.]+)/) {
+ $threshold = $3;
+ $score = $2;
+ }
+ } else {
+ $in_spam_header = 0;
+ }
+ if (/^\s*$/) {
+ last;
+ }
+ }
if ($DEBUG) {
- print STDERR "[$?;".($? >> 8)."] ";
- print STDERR map {s/\n//; $_ } <$spamc_out>;
- print STDERR " ";
+ print STDERR "[$exit_code] [$score/$threshold]\n$report\n";
}
close($spamc_out);
$SIG{"PIPE"} = $old_sig;
- if ($? >> 8) {
- $is_spam = 1;
- }
- my ($first_line,@report) = <$spamc_out>;
- if (defined $first_line) {
- chomp $first_line;
- ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
- $report = join('',@report);
- }
};
if ($@) {
carp "processing of message failed [$@]\n";
my ($msg_id) = record_regex($record,
qr/^Message-Id:\s+<(.+)>/mi);
next unless defined $msg_id;
+ print STDERR "examining $msg_id: " if $DEBUG;
if ($msg_id =~ /$config{email_domain}$/) {
- print STDERR "skipping $msg_id\n" if $DEBUG;
+ print STDERR "skipping\n" if $DEBUG;
next;
}
- print STDERR "examining $msg_id: " if $DEBUG;
if ($seen_msgids{$msg_id}) {
print STDERR "already seen\n" if $DEBUG;
next;
}
$seen_msgids{$msg_id}=1;
$sub->($bug_num,$record,$msg_id);
+ print STDERR "\n" if $DEBUG;
}
}