From bf594645dcc87ce277ecd12df13271f80c6e6cbf Mon Sep 17 00:00:00 2001 From: cjwatson <> Date: Tue, 13 Jan 2004 18:55:53 -0800 Subject: [PATCH] [project @ 2004-01-13 18:55:53 by cjwatson] New spamscan script, to let us run SpamAssassin concurrently with procmail rather than from procmail on bugs.debian.org. I'm hoping that this will alleviate most of our mail queueing problems. This doesn't require a separate spamd, but uses the SA Perl modules itself; this way we can use more interesting user rules (spamd forbids anything that might be able to evaluate arbitrary Perl). There's no upgrade documentation yet, but unless $gSpamScan is set in /etc/debbugs/config the whole thing is a no-op, so that's OK. --- debian/control | 1 + debian/crontab | 1 + scripts/receive.in | 10 +++- scripts/spamscan.in | 139 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 2 deletions(-) create mode 100755 scripts/spamscan.in diff --git a/debian/control b/debian/control index c86341d8..b3d321b8 100644 --- a/debian/control +++ b/debian/control @@ -10,6 +10,7 @@ Package: debbugs Architecture: all Depends: perl5 | perl, exim | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl Recommends: httpd, links | lynx +Suggests: spamassassin Description: The bug tracking system based on the active Debian BTS Debian has a bug tracking system which files details of bugs reported by users and developers. Each bug is given a number, and is kept on file until diff --git a/debian/crontab b/debian/crontab index 4389696d..cd5d7b88 100644 --- a/debian/crontab +++ b/debian/crontab @@ -1,6 +1,7 @@ # Example crontab for bugs system. Install this as a user crontab for the # user who owns /var/lib/debbugs/spool (for example, debbugs). MAILTO=owner@bugs.example.org +* * * * * /usr/lib/debbugs/spamscan >/dev/null 3,18,33,48 * * * * /usr/lib/debbugs/processall >/dev/null 7,22,37,52 * * * * /usr/lib/debbugs/rebuild db-h 9,39 * * * * /usr/lib/debbugs/html-control >/dev/null diff --git a/scripts/receive.in b/scripts/receive.in index fdede3da..899ccd2e 100755 --- a/scripts/receive.in +++ b/scripts/receive.in @@ -1,5 +1,5 @@ #!/usr/bin/perl -# $Id: receive.in,v 1.15 2003/01/28 23:52:08 cjwatson Exp $ +# $Id: receive.in,v 1.16 2004/01/13 18:55:53 cjwatson Exp $ # usage: mail is piped directly into program #set umask in order to have group-writable incoming/* @@ -121,7 +121,13 @@ printf(FILE "Received: (at %s) by $gEmailDomain; %d %s %d %02d:%02d:%02d +0000\n while() { print(FILE) || &failure("write temporary file: $!"); } close(FILE) || &failure("close temporary file: $!"); -rename("T.$id","I$queue.$id") || &failure("rename spool message: $!"); +my $prefix; +if ($gSpamScan) { + $prefix = 'S'; +} else { + $prefix = 'I'; +} +rename("T.$id","$prefix$queue.$id") || &failure("rename spool message: $!"); exit(0); diff --git a/scripts/spamscan.in b/scripts/spamscan.in new file mode 100755 index 00000000..25b7a3cb --- /dev/null +++ b/scripts/spamscan.in @@ -0,0 +1,139 @@ +#! /usr/bin/perl +# $Id: spamscan.in,v 1.1 2004/01/13 18:55:53 cjwatson Exp $ +# +# Usage: spamscan +# +# Performs SpamAssassin checks on a message before allowing it through to +# the main incoming queue. +# +# Uses up: incoming/S.nn +# Temps: incoming/R.nn +# Creates: incoming/I.nn +# Stop: spamscan-stop + +$config_path = '/etc/debbugs'; +$lib_path = '/usr/lib/debbugs'; + +require "$config_path/config"; +require "$lib_path/errorlib"; +$ENV{PATH} = $lib_path . ':' . $ENV{PATH}; + +chdir $gSpoolDir or die "chdir spool: $!\n"; +push @INC, $lib_path; + +use Mail::SpamAssassin; +use Mail::SpamAssassin::NoMailAudit; + +umask 002; + +my $user_prefs = "$ENV{HOME}/.spamassassin/user_prefs"; +my $user_prefs_time; +if (-e $user_prefs) { + $user_prefs_time = (stat $user_prefs)[9]; +} + +my $spam = Mail::SpamAssassin->new({ + dont_copy_prefs => 1, + site_rules_filename => $gSpamRulesDir, + userprefs_filename => $user_prefs, + local_tests_only => ($gSpamLocalTestsOnly || 0), + debug => ($ENV{DEBBUGS_SPAM_DEBUG} || 0), +}); +$spam->compile_now(1); # use all user preferences + +$| = 1; + +my @ids; +my %fudged; + +sub header_or_empty ($$) { + my ($mail, $hdr) = @_; + my $value = $mail->get_header($hdr); + if (defined $value) { + chomp $value; + return $value; + } + return ''; +} + +&filelock('incoming-spamscan'); +for (;;) { + if (-f 'spamscan-stop') { + print STDERR "spamscan-stop file created\n"; + last; + } + if (-e $user_prefs) { + if ($user_prefs_time != (stat $user_prefs)[9]) { + # stop and wait to be re-invoked from cron + last; + } + } + + if (!@ids) { + opendir DIR, 'incoming' or die "opendir incoming: $!"; + while (defined($_ = readdir DIR)) { + push @ids, $_ if s/^S//; + } + last unless @ids; + @ids = sort @ids; + } + + my $nf = @ids; + my $id = shift @ids; + unless (rename "incoming/S$id", "incoming/R$id") { + if ($fudged{$id}) { + die "$id already fudged once! $!\n"; + } + $fudged{$id} = 1; + next; + } + + print "[$nf] $id scanning ...\n" or die "print log: $!"; + + open MESSAGE, "< incoming/R$id" or die "open incoming/R$id: $!"; + my @textarray; + # Kludge to work around Received: then From_ weirdness in receive; + # remove when receive is fixed? We may continue to need it for + # reprocessing old messages. + $textarray[0] = ; + if ($textarray[0] =~ /^Received:/) { + my $maybefrom = ; + if ($maybefrom =~ /^From /) { + $textarray[1] = $textarray[0]; + $textarray[0] = $maybefrom; + } else { + $textarray[1] = $maybefrom; + } + } + push @textarray, ; + close MESSAGE; + my $mail = Mail::SpamAssassin::NoMailAudit->new(data => \@textarray); + $mail->{noexit} = 1; + + print " From: ", header_or_empty($mail, 'From'), "\n"; + print " Subject: ", header_or_empty($mail, 'Subject'), "\n"; + print " Message-Id: ", header_or_empty($mail, 'Message-Id'), "\n"; + my $status = $spam->check($mail); + $status->rewrite_mail(); + + if ($status->is_spam()) { + $mail->accept($gSpamMailbox); + unlink "incoming/R$id" or warn "unlink incoming/R$id: $!"; + printf " spam %.1f/%.1f\n", + $status->get_hits(), $status->get_required_hits() + or die "printf log: $!"; + } else { + open OUT, "> incoming/I$id" or die "open incoming/I$id: $!"; + print OUT $mail->as_string() or die "print incoming/I$id: $!"; + close OUT or die "close incoming/I$id: $!"; + unlink "incoming/R$id" or warn "unlink incoming/R$id: $!"; + printf " ok %.1f/%.1f\n", + $status->get_hits(), $status->get_required_hits() + or die "printf log: $!"; + } + + $status->finish(); +} +&unfilelock; + +exit 0; -- 2.39.5