2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
83 Learn from messages which are ham/spam
92 /usr/sbin/spamd --socketpath=/home/debbugs/spamd_socket \
93 --nouser-config --cf='include /home/debbugs/.spamassassin/user_prefs' \
94 --cf='allow_user_rules 1' --allow-tell;
98 debbugs-spam --spamc-opts '-U' --spamc-opts '/home/debbugs/spamd_socket' \
106 use Debbugs::Log qw(record_regex);
107 use Debbugs::Log::Spam;
108 use Debbugs::Config qw(:config);
109 use Debbugs::Command qw(:all);
110 use IPC::Open3 qw(open3);
124 handle_main_arguments(\%options,
129 'spamc_opts|spamc-opts=s@' => 0,
130 'spool_dir|spool-dir=s',
131 'debug|d+','help|h|?','man|m');
134 ('auto-scan' => {function => \&auto_spamscan,
135 arguments => {'ham_threshold|ham-threshold=s' => 0,
137 defaults => {ham_threshold => -5},
139 'score' => {function => \&score_bug,
140 arguments => {'skip_seen|skip-seen!' => 0
143 'mark-spam' => {function => \&mark_spam,
145 'mark-ham' => {function => \&mark_ham,
147 'learn' => {function => \&learn,
149 'help' => {function => sub {pod2usage({verbose => 2});}}
152 pod2usage() if $options{help};
153 pod2usage({verbose=>2}) if $options{man};
155 $DEBUG = $options{debug};
158 $options{verbose} = $options{verbose} - $options{quiet};
160 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
162 my ($subcommand) = shift @ARGV;
163 if (not defined $subcommand) {
164 $subcommand = 'help';
165 print STDERR "You must provide a subcommand; displaying usage.\n";
167 } elsif (not exists $subcommands{$subcommand}) {
168 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
172 if (exists $options{spool_dir} and defined $options{spool_dir}) {
173 $config{spool_dir} = $options{spool_dir};
175 if ($subcommand ne 'help') {
176 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
179 handle_subcommand_arguments(\@ARGV,
180 $subcommands{$subcommand}{arguments},
181 $subcommands{$subcommand}{defaults},
183 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
195 my ($spam_ham,$options,$opts,$config,$argv) = @_;
196 my $regex = shift @{$argv};
197 for my $bug_num (@{$argv}) {
198 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
199 die "Unable to open bug log spam for $bug_num";
201 my ($bn,$rec,$mid) = @_;
202 my $body = $rec->{text};
203 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
205 if ($subject =~ /$regex/) {
208 if ($mid =~ /$regex/) {
212 print STDERR "it's a match" if $DEBUG;
213 if ($spam_ham eq 'spam') {
214 $spam->add_spam($mid);
216 $spam->add_ham($mid);
227 my ($options,$opts,$config,$argv) = @_;
228 for my $bug_num (@{$argv}) {
229 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
230 die "Unable to open bug log spam for $bug_num";
232 my ($bn,$rec,$mid) = @_;
234 if ($spam->is_spam($mid)) {
236 spam_score($rec,$options->{spamc},
237 [@{$options->{spamc_opts}},
241 print STDERR "learning spam" if $DEBUG;
242 } elsif ($spam->is_ham($mid)) {
244 spam_score($rec,$options->{spamc},
245 [@{$options->{spamc_opts}},
249 print STDERR "learning ham" if $DEBUG;
251 print STDERR "not learning" if $DEBUG;
253 print STDERR " from $mid" if $DEBUG;
262 my ($options,$opts,$config,$argv) = @_;
263 for my $bug_num (@{$argv}) {
265 spam_score_bug($bug_num,
267 $options->{spamc_opts},
270 print "$_->{score} $_->{message_id} $_->{subject}\n"
276 my ($options,$opts,$config,$argv) = @_;
278 for my $bug_num (@{$argv}) {
279 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
280 die "Unable to open bug log spam for $bug_num";
282 my ($bn,$rec,$mid) = @_;
283 if ($spam->is_spam($mid)) {
284 print STDERR "already spam\n" if $DEBUG;
287 if ($spam->is_ham($mid)) {
288 print STDERR "already ham\n" if $DEBUG;
291 my ($score,$is_spam,$report,$threshold) =
294 $options->{spamc_opts},
297 print STDERR "it's spam ($score)\n" if $DEBUG;
298 $spam->add_spam($mid);
299 } elsif ($score < $opts->{ham_threshold}) {
300 print STDERR "it's really ham ($score)\n" if $DEBUG;
301 $spam->add_ham($mid);
304 print STDERR "it's ham ($score)\n" if $DEBUG;
314 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
318 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
319 die "Unable to open bug log spam for $bug";
323 my ($bn,$rec,$mid) = @_;
326 if ($spam->is_spam($mid)) {
328 } elsif ($spam->is_ham($mid)) {
333 spam_score($rec,$spamc,$spamc_opts);
334 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
347 my ($record,$spamc,$spamc_opts) = @_;
348 my ($score,$threshold,$report);
354 my ($spamc_in,$spamc_out);
355 my $old_sig = $SIG{"PIPE"};
357 die "SIGPIPE in child for some reason";
360 open3($spamc_in,$spamc_out,0,
361 $spamc,'-E','--headers',@{$spamc_opts}) or
362 die "Unable to fork spamc: $!";
364 die "Unable to fork spamc";
366 print {$spamc_in} $record->{text};
367 close($spamc_in) or die "Unable to close spamc_in: $!";
368 waitpid($childpid,0);
369 my $exit_code = $? >> 8;
373 while (<$spamc_out>) {
376 if (/^X-Spam-Status: (Yes|No), score=(-?[\d\.]+) required=(-?[\d\.]+)/) {
386 print STDERR "[$exit_code] [$score/$threshold]\n$report\n";
389 $SIG{"PIPE"} = $old_sig;
392 carp "processing of message failed [$@]\n";
395 return wantarray?($score,$is_spam,$report):$score;
399 my ($sub,$bug_num) = @_;
400 my $log = Debbugs::Log->new(bug_num => $bug_num) or
401 die "Unable to open bug log for $bug_num";
403 while (my $record = $log->read_record()) {
404 next if $record->{type} eq 'html';
405 next if $record->{type} eq 'autocheck';
406 my ($msg_id) = record_regex($record,
407 qr/^Message-Id:\s+<(.+)>/mi);
408 next unless defined $msg_id;
409 print STDERR "examining $msg_id: " if $DEBUG;
410 if ($msg_id =~ /$config{email_domain}$/) {
411 print STDERR "skipping\n" if $DEBUG;
414 if ($seen_msgids{$msg_id}) {
415 print STDERR "already seen\n" if $DEBUG;
418 $seen_msgids{$msg_id}=1;
419 $sub->($bug_num,$record,$msg_id);
420 print STDERR "\n" if $DEBUG;
428 # cperl-indent-level: 4
429 # indent-tabs-mode: nil