-#! /usr/bin/perl
-# $Id: spamscan.in,v 1.1 2004/01/13 18:55:53 cjwatson Exp $
+#! /usr/bin/perl -T
+# $Id: spamscan.in,v 1.10 2005/07/22 21:37:31 don Exp $
#
# Usage: spamscan
#
require "$lib_path/errorlib";
$ENV{PATH} = $lib_path . ':' . $ENV{PATH};
+exit unless $gSpamScan;
+
chdir $gSpoolDir or die "chdir spool: $!\n";
push @INC, $lib_path;
use Mail::SpamAssassin;
-use Mail::SpamAssassin::NoMailAudit;
+
+use lib '/usr/lib/debbugs';
+use Mail::CrossAssassin;
umask 002;
+eval {
+ &filelock('incoming-spamscan');
+};
+exit if $@;
+
+ca_init('\b\d{3,8}(?:-(?:close|done|forwarded|maintonly|submitter|quiet))?\@bugs\.debian\.org', '/org/bugs.debian.org/CrossAssassinDb');
+
+my %spamseen = ();
+
my $user_prefs = "$ENV{HOME}/.spamassassin/user_prefs";
my $user_prefs_time;
if (-e $user_prefs) {
site_rules_filename => $gSpamRulesDir,
userprefs_filename => $user_prefs,
local_tests_only => ($gSpamLocalTestsOnly || 0),
- debug => ($ENV{DEBBUGS_SPAM_DEBUG} || 0),
+# debug => ($ENV{DEBBUGS_SPAM_DEBUG} || 0),
+# check_mx_delay => 2, # bit of a hack until we have parallelization
});
$spam->compile_now(1); # use all user preferences
return '';
}
-&filelock('incoming-spamscan');
for (;;) {
if (-f 'spamscan-stop') {
- print STDERR "spamscan-stop file created\n";
+ print "spamscan-stop file created\n";
last;
}
if (-e $user_prefs) {
if (!@ids) {
opendir DIR, 'incoming' or die "opendir incoming: $!";
while (defined($_ = readdir DIR)) {
- push @ids, $_ if s/^S//;
+ push @ids, $1 if /^S(.*)/;
}
last unless @ids;
@ids = sort @ids;
}
push @textarray, <MESSAGE>;
close MESSAGE;
- my $mail = Mail::SpamAssassin::NoMailAudit->new(data => \@textarray);
- $mail->{noexit} = 1;
+ my $mail = $spam->parse(\@textarray);
+ my $messageid = header_or_empty($mail, 'Message-Id');
print " From: ", header_or_empty($mail, 'From'), "\n";
print " Subject: ", header_or_empty($mail, 'Subject'), "\n";
- print " Message-Id: ", header_or_empty($mail, 'Message-Id'), "\n";
- my $status = $spam->check($mail);
- $status->rewrite_mail();
-
- if ($status->is_spam()) {
- $mail->accept($gSpamMailbox);
+ print " Date: ", header_or_empty($mail, 'Date'), "\n";
+ print " Message-Id: $messageid\n";
+ my $ca_score = ca_set(ca_keys($mail->get_body));
+ if (exists $spamseen{$messageid}) {
+ # XXX THIS DOES NOT DO LOCKING
+ open OUT, ">> $gSpamMailbox" or die "open $gSpamMailbox failed: $!";
+ print OUT $mail->get_pristine or die "print $gSpamMailbox failed: $!";
+ close OUT or die "close $gSpamMailbox failed: $!";
unlink "incoming/R$id" or warn "unlink incoming/R$id: $!";
- printf " spam %.1f/%.1f\n",
- $status->get_hits(), $status->get_required_hits()
+ print " spam $spamseen{$messageid} duplicate\n"
or die "printf log: $!";
} else {
- open OUT, "> incoming/I$id" or die "open incoming/I$id: $!";
- print OUT $mail->as_string() or die "print incoming/I$id: $!";
- close OUT or die "close incoming/I$id: $!";
- unlink "incoming/R$id" or warn "unlink incoming/R$id: $!";
- printf " ok %.1f/%.1f\n",
- $status->get_hits(), $status->get_required_hits()
- or die "printf log: $!";
- }
+ my $status = $spam->check($mail);
+ my $munged_mail = $status->rewrite_mail();
+
+ if ($status->is_spam()) {
+ # XXX THIS DOES NOT DO LOCKING
+ open OUT, ">> $gSpamMailbox" or die "open $gSpamMailbox failed: $!";
+ print OUT $munged_mail or die "print $gSpamMailbox failed: $!";
+ close OUT or die "close $gSpamMailbox failed: $!";
+ unlink "incoming/R$id" or warn "unlink incoming/R$id: $!";
+ my $score = sprintf "%.1f/%.1f %d",
+ $status->get_score(), $status->get_required_score(), $ca_score;
+ print " spam $score\n" or die "print log: $!";
+ $spamseen{$messageid} = $score;
+ } elsif ($status->get_score() > 0 && $ca_score >= 4) {
+ # XXX THIS DOES NOT DO LOCKING
+ open OUT, ">> $gCrossMailbox" or die "open $gCrossMailbox failed: $!";
+ print OUT $munged_mail or die "print $gCrossMailbox failed: $!";
+ close OUT or die "close $gCrossMailbox failed: $!";
+ unlink "incoming/R$id" or warn "unlink incoming/R$id: $!";
+ my $score = sprintf "%.1f/%.1f %d",
+ $status->get_score(), $status->get_required_score(), $ca_score;
+ printf " spam $score\n" or die "printf log: $!";
+ $spamseen{$messageid} = $score;
+ } else {
+ open OUT, "> incoming/I$id" or die "open incoming/I$id: $!";
+ my ($received,$from,$rest_of_message) = split /\n/, $munged_mail, 3;
+ my ($headers,$body) = split /\n\n/, $rest_of_message, 2;
+ if ($received =~ /^From /) {
+ ($received,$from) = ($from,$received);
+ }
+ print OUT map { "$_\n"} ($received,$from,$headers) or die "print incoming/I$id: $!";
+ if ($ca_score > 1) {
+ print OUT "X-CrossAssassin-Score: $ca_score\n"
+ or die "print incoming/I$id: $!";
+ }
+ print OUT "\n" or die "print incoming/I$id: $!";
+ print OUT $body or die "print incoming/I$id: $!";
+ close OUT or die "close incoming/I$id: $!";
+ unlink "incoming/R$id" or warn "unlink incoming/R$id: $!";
+ printf " ok %.1f/%.1f %d\n",
+ $status->get_score(), $status->get_required_score(), $ca_score
+ or die "printf log: $!";
+ }
- $status->finish();
+ $status->finish();
+ }
+ $mail->finish;
}
&unfilelock;