#! /usr/bin/perl # debbugs-spam is part of debbugs, and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2012 by Don Armstrong . use warnings; use strict; use Getopt::Long qw(:config no_ignore_case); use Pod::Usage; =head1 NAME debbugs-spam -- Scan log files for spam and populate nnn.log.spam =head1 SYNOPSIS debbugs-spam [options] bugnumber [[bugnumber2]..] Options: --spool-dir debbugs spool directory --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--spool-dir> Debbugs spool directory; defaults to the value configured in the debbugs configuration file. =item B<--debug, -d> Debug verbosity. =item B<--help, -h> Display brief useage information. =item B<--man, -m> Display this manual. =back =head1 SUBCOMMANDS =over =item B Automatically scan messages using spamassassin and mark messages as spam which hit the threshold, and those that are highly negative as ham. =item B Output the score of all of the messages in a bug =over =item B<--skip-seen> Skip messages which have previously been classified =back =item B Mark messages as spam if there is a regex match to subject or message id =item B Mark messages as ham if there is a regex match to subject or message id =item B Learn from messages which are ham/spam =back =head1 EXAMPLES Start spamd: /usr/sbin/spamd --socketpath=/home/debbugs/spamd_socket \ --nouser-config --cf='include /home/debbugs/.spamassassin/user_prefs' \ --cf='allow_user_rules 1' --allow-tell; Then score bugs: debbugs-spam --spamc-opts '-U' --spamc-opts '/home/debbugs/spamd_socket' \ score 859123; =cut use vars qw($DEBUG); use Debbugs::Log qw(record_regex); use Debbugs::Log::Spam; use Debbugs::Config qw(:config); use Debbugs::Command qw(:all); use IPC::Open3 qw(open3); use Carp; my %options = (debug => 0, help => 0, man => 0, verbose => 0, quiet => 0, quick => 0, spamc => 'spamc', spamc_opts => [], ); handle_main_arguments(\%options, 'quick|q', 'service|s', 'sysconfdir|c', 'spamc=s' => 0, 'spamc_opts|spamc-opts=s@' => 0, 'spool_dir|spool-dir=s', 'debug|d+','help|h|?','man|m'); my %subcommands = ('auto-scan' => {function => \&auto_spamscan, arguments => {'ham_threshold|ham-threshold=s' => 0, }, defaults => {ham_threshold => -5}, }, 'score' => {function => \&score_bug, arguments => {'skip_seen|skip-seen!' => 0 }, }, 'mark-spam' => {function => \&mark_spam, }, 'mark-ham' => {function => \&mark_ham, }, 'learn' => {function => \&learn, }, 'help' => {function => sub {pod2usage({verbose => 2});}} ); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; $options{verbose} = $options{verbose} - $options{quiet}; pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my ($subcommand) = shift @ARGV; if (not defined $subcommand) { $subcommand = 'help'; print STDERR "You must provide a subcommand; displaying usage.\n"; pod2usage(); } elsif (not exists $subcommands{$subcommand}) { print STDERR "$subcommand is not a valid subcommand; displaying usage.\n"; pod2usage(); } if (exists $options{spool_dir} and defined $options{spool_dir}) { $config{spool_dir} = $options{spool_dir}; } if ($subcommand ne 'help') { chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!"; } my $opts = handle_subcommand_arguments(\@ARGV, $subcommands{$subcommand}{arguments}, $subcommands{$subcommand}{defaults}, ); $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV); sub mark_ham { mark_it('ham',@_); } sub mark_spam { mark_it('spam',@_); } sub mark_it { my ($spam_ham,$options,$opts,$config,$argv) = @_; my $regex = shift @{$argv}; for my $bug_num (@{$argv}) { my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or die "Unable to open bug log spam for $bug_num"; foreachmsg(sub { my ($bn,$rec,$mid) = @_; my $body = $rec->{text}; my ($subject) = $body =~ /^Subject: *(.+)$/mi; my $is_match = 0; if ($subject =~ /$regex/) { $is_match = 1; } if ($mid =~ /$regex/) { $is_match = 1; } if ($is_match) { print STDERR "it's a match" if $DEBUG; if ($spam_ham eq 'spam') { $spam->add_spam($mid); } else { $spam->add_ham($mid); } } }, $bug_num ); $spam->save(); } } sub learn { my ($options,$opts,$config,$argv) = @_; for my $bug_num (@{$argv}) { my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or die "Unable to open bug log spam for $bug_num"; foreachmsg(sub { my ($bn,$rec,$mid) = @_; my $score; if ($spam->is_spam($mid)) { $score //= spam_score($rec,$options->{spamc}, [@{$options->{spamc_opts}}, '-L','spam' ] ); print STDERR "learning spam" if $DEBUG; } elsif ($spam->is_ham($mid)) { $score //= spam_score($rec,$options->{spamc}, [@{$options->{spamc_opts}}, '-L','ham' ] ); print STDERR "learning ham" if $DEBUG; } else { print STDERR "not learning" if $DEBUG; } print STDERR " from $mid" if $DEBUG; }, $bug_num ); } } sub score_bug { my ($options,$opts,$config,$argv) = @_; for my $bug_num (@{$argv}) { my @bug_score = spam_score_bug($bug_num, $options->{spamc}, $options->{spamc_opts}, $opts->{skip_seen}, ); print "$_->{score} $_->{message_id} $_->{subject}\n" foreach @bug_score; } } sub auto_spamscan { my ($options,$opts,$config,$argv) = @_; for my $bug_num (@{$argv}) { my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or die "Unable to open bug log spam for $bug_num"; foreachmsg(sub { my ($bn,$rec,$mid) = @_; if ($spam->is_spam($mid)) { print STDERR "already spam\n" if $DEBUG; return; } if ($spam->is_ham($mid)) { print STDERR "already ham\n" if $DEBUG; return; } my ($score,$is_spam,$report,$threshold) = spam_score($rec, $options->{spamc}, $options->{spamc_opts}, ); if ($is_spam) { print STDERR "it's spam ($score)\n" if $DEBUG; $spam->add_spam($mid); } elsif ($score < $opts->{ham_threshold}) { print STDERR "it's really ham ($score)\n" if $DEBUG; $spam->add_ham($mid); } else { print STDERR "it's ham ($score)\n" if $DEBUG; } }, $bug_num, ); $spam->save(); } } sub spam_score_bug { my ($bug,$spamc,$spamc_opts,$skip_seen) = @_; my $spam; if ($skip_seen) { $spam = Debbugs::Log::Spam->new(bug_num => $bug) or die "Unable to open bug log spam for $bug"; } my @records; foreachmsg(sub { my ($bn,$rec,$mid) = @_; my $score; if ($skip_seen) { if ($spam->is_spam($mid)) { $score = 999; } elsif ($spam->is_ham($mid)) { $score = -999; } } $score //= spam_score($rec,$spamc,$spamc_opts); my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi; push @records, {message_id => $mid, score => $score, subject => $subject, }; }, $bug ); return @records; } sub spam_score { my ($record,$spamc,$spamc_opts) = @_; my ($score,$threshold,$report); my $is_spam = 0; eval { $report = ''; $score = 0; $threshold = 5; my ($spamc_in,$spamc_out); my $old_sig = $SIG{"PIPE"}; $SIG{"PIPE"} = sub { die "SIGPIPE in child for some reason"; }; my $childpid = open3($spamc_in,$spamc_out,0, $spamc,'-E','--headers',@{$spamc_opts}) or die "Unable to fork spamc: $!"; if (not $childpid) { die "Unable to fork spamc"; } print {$spamc_in} $record->{text}; close($spamc_in) or die "Unable to close spamc_in: $!"; waitpid($childpid,0); my $exit_code = $? >> 8; if ($exit_code) { $is_spam = 1; } while (<$spamc_out>) { if (/^X-Spam/) { $report .= $_; if (/^X-Spam-Status: (Yes|No), score=(-?[\d\.]+) required=(-?[\d\.]+)/) { $threshold = $3; $score = $2; } } if (/^\s*$/) { last; } } if ($DEBUG) { print STDERR "[$exit_code] [$score/$threshold]\n$report\n"; } close($spamc_out); $SIG{"PIPE"} = $old_sig; }; if ($@) { carp "processing of message failed [$@]\n"; return undef; } return wantarray?($score,$is_spam,$report):$score; } sub foreachmsg { my ($sub,$bug_num) = @_; my $log = Debbugs::Log->new(bug_num => $bug_num) or die "Unable to open bug log for $bug_num"; my %seen_msgids; while (my $record = $log->read_record()) { next if $record->{type} eq 'html'; next if $record->{type} eq 'autocheck'; my ($msg_id) = record_regex($record, qr/^Message-Id:\s+<(.+)>/mi); next unless defined $msg_id; print STDERR "examining $msg_id: " if $DEBUG; if ($msg_id =~ /$config{email_domain}$/) { print STDERR "skipping\n" if $DEBUG; next; } if ($seen_msgids{$msg_id}) { print STDERR "already seen\n" if $DEBUG; next; } $seen_msgids{$msg_id}=1; $sub->($bug_num,$record,$msg_id); print STDERR "\n" if $DEBUG; } } __END__ # Local Variables: # cperl-indent-level: 4 # indent-tabs-mode: nil # End: