2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
83 Learn from messages which have been marked as spam/ham
96 use Debbugs::Log qw(record_regex);
97 use Debbugs::Log::Spam;
98 use Debbugs::Config qw(:config);
99 use Debbugs::Command qw(:all);
100 use Debbugs::Common qw(getbuglocation);
101 use IPC::Open3 qw(open3);
114 handle_main_arguments(\%options,
118 'spamc_opts|spamc-opts=s@' => 0,
119 'spool_dir|spool-dir=s',
122 'debug|d+','help|h|?','man|m');
125 ('auto-scan' => {function => \&auto_spamscan,
126 arguments => {'ham_threshold|ham-threshold=s' => 0,
127 'skip_missing|skip-missing!' => 0,
129 defaults => {ham_threshold => -5,
133 'score' => {function => \&score_bug,
134 arguments => {'skip_seen|skip-seen!' => 0
137 'mark-spam' => {function => \&mark_spam,
139 'mark-ham' => {function => \&mark_ham,
141 'learn' => {function => \&learn,
142 arguments => {'skip_missing|skip-missing!' => 0,
144 defaults => {skip_missing => 0,
147 'help' => {function => sub {pod2usage({verbose => 2});}}
150 pod2usage() if $options{help};
151 pod2usage({verbose=>2}) if $options{man};
153 $DEBUG = $options{debug};
156 $options{verbose} = $options{verbose} - $options{quiet};
158 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
160 my ($subcommand) = shift @ARGV;
161 if (not defined $subcommand) {
162 $subcommand = 'help';
163 print STDERR "You must provide a subcommand; displaying usage.\n";
165 } elsif (not exists $subcommands{$subcommand}) {
166 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
170 if (exists $options{spool_dir} and defined $options{spool_dir}) {
171 $config{spool_dir} = $options{spool_dir};
173 if ($subcommand ne 'help') {
174 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
177 handle_subcommand_arguments(\@ARGV,
178 $subcommands{$subcommand}{arguments},
179 $subcommands{$subcommand}{defaults},
181 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
193 my ($spam_ham,$options,$opts,$config,$argv) = @_;
194 my $regex = shift @{$argv};
195 for my $bug_num (@{$argv}) {
196 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
197 die "Unable to open bug log spam for $bug_num";
199 my ($bn,$rec,$mid) = @_;
200 my $body = $rec->{text};
201 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
203 if ($subject =~ /\Q$regex\E/) {
206 if ($mid =~ /\Q$regex\E/) {
210 print STDERR "it's a match" if $DEBUG;
211 if ($spam_ham eq 'spam') {
212 $spam->add_spam($mid);
214 $spam->add_ham($mid);
226 my ($options,$opts,$config,$argv) = @_;
227 for my $bug_num (@{$argv}) {
229 spam_score_bug($bug_num,
231 $options->{spamc_opts},
234 print "$_->{score} $_->{message_id} $_->{subject}\n"
240 my ($options,$opts,$config,$argv) = @_;
242 for my $bug_num (@{$argv}) {
243 if ($opts->{skip_missing} and
244 not defined getbuglocation($bug_num,'log')) {
245 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
248 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
249 die "Unable to open bug log spam for $bug_num";
251 my ($bn,$rec,$mid) = @_;
252 if ($spam->is_spam($mid)) {
253 spamc_learn_spam($rec,$options->{spamc},$options->{spamc_opts});
256 if ($spam->is_ham($mid)) {
257 spamc_learn_ham($rec,$options->{spamc},$options->{spamc_opts});
268 my ($options,$opts,$config,$argv) = @_;
270 for my $bug_num (@{$argv}) {
271 if ($opts->{skip_missing} and
272 not defined getbuglocation($bug_num,'log')) {
273 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
276 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
277 die "Unable to open bug log spam for $bug_num";
279 my ($bn,$rec,$mid) = @_;
280 if ($spam->is_spam($mid)) {
281 print STDERR "already spam\n" if $DEBUG;
284 if ($spam->is_ham($mid)) {
285 print STDERR "already ham\n" if $DEBUG;
288 my ($score,$is_spam,$report,$threshold) =
291 $options->{spamc_opts},
294 print STDERR "it's spam ($score)\n" if $DEBUG;
295 $spam->add_spam($mid);
296 } elsif ($score < $opts->{ham_threshold}) {
297 print STDERR "it's really ham ($score)\n" if $DEBUG;
298 $spam->add_ham($mid);
301 print STDERR "it's ham ($score)\n" if $DEBUG;
311 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
315 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
316 die "Unable to open bug log spam for $bug";
320 my ($bn,$rec,$mid) = @_;
323 if ($spam->is_spam($mid)) {
325 } elsif ($spam->is_ham($mid)) {
330 spam_score($rec,$spamc,$spamc_opts);
331 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
343 sub add_return_path {
345 ## debbugs is kind of odd, and puts "Received:" first, them "From
346 ## ", and doesn't have a Return-Path. Fix that up so spamassassin
349 s{^(Received: at \S+\ by \S+;[^\n]+\n)(From (\S+) [^\n]+\n)}
350 {${1}Return-Path: $3\n$2};
355 my ($record,$spamc,$spamc_opts) = @_;
360 my ($spamc_in,$spamc_out);
361 my $old_sig = $SIG{"PIPE"};
363 die "SIGPIPE in child for some reason";
366 open3($spamc_in,$spamc_out,0,
367 $spamc,@{$spamc_opts}) or
368 die "Unable to fork spamc: $!";
370 die "Unable to fork spamc";
372 print STDERR add_return_path($record->{text}) if $DEBUG > 1;
373 print {$spamc_in} add_return_path($record->{text});
374 close($spamc_in) or die "Unable to close spamc_in: $!";
375 waitpid($childpid,0);
377 $exit_code = $? >> 8;
380 $report = <$spamc_out>;
382 $SIG{"PIPE"} = $old_sig;
385 carp "processing of message failed [$@]\n";
388 return ($exit_code,$report);
392 my ($record,$spamc,$spamc_opts) = @_;
393 my ($score,$threshold,$report,$exit_code);
394 ($exit_code,$report) =
395 spamc_bug($record,$spamc,[@{$spamc_opts},'-c']);
396 if (defined $report) {
397 ($score,$threshold) = $report =~ s{^(-?[\d\.]+)/(-?[\d\.]+)\n?}{};
399 return wantarray?($score,$exit_code,$report):$score;
402 sub spamc_learn_ham {
403 spamc_learn('ham',@_);
406 sub spamc_learn_forget {
407 spamc_learn('forget',@_);
410 sub spamc_learn_spam {
411 spamc_learn('spam',@_);
415 my ($type,$record,$spamc,$spamc_opts) = @_;
416 spamc_bug($record,$spamc,[@{$spamc_opts},'-L',$type])
420 my ($sub,$bug_num) = @_;
421 my $log = Debbugs::Log->new(bug_num => $bug_num) or
422 die "Unable to open bug log for $bug_num";
424 while (my $record = $log->read_record()) {
425 next unless $record->{type} eq 'incoming-recv';
426 my ($msg_id) = record_regex($record,
427 qr/^Message-Id:\s+<(.+)>/mi);
428 next unless defined $msg_id;
429 print STDERR "examining $msg_id: " if $DEBUG;
430 if ($msg_id =~ /$config{email_domain}$/) {
431 print STDERR "skipping\n" if $DEBUG;
434 if ($seen_msgids{$msg_id}) {
435 print STDERR "already seen\n" if $DEBUG;
438 $seen_msgids{$msg_id}=1;
439 $sub->($bug_num,$record,$msg_id);
440 print STDERR "\n" if $DEBUG;
448 # cperl-indent-level: 4
449 # indent-tabs-mode: nil