2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
83 Learn from messages which have been marked as spam/ham
96 use Debbugs::Log qw(record_regex);
97 use Debbugs::Log::Spam;
98 use Debbugs::Config qw(:config);
99 use Debbugs::Command qw(:all);
100 use Debbugs::Common qw(getbuglocation);
101 use IPC::Open3 qw(open3);
114 handle_main_arguments(\%options,
118 'spamc_opts|spamc-opts=s@' => 0,
119 'spool_dir|spool-dir=s',
122 'debug|d+','help|h|?','man|m');
125 ('auto-scan' => {function => \&auto_spamscan,
126 arguments => {'ham_threshold|ham-threshold=s' => 0,
127 'skip_missing|skip-missing!' => 0,
129 defaults => {ham_threshold => -5,
133 'score' => {function => \&score_bug,
134 arguments => {'skip_seen|skip-seen!' => 0
137 'mark-spam' => {function => \&mark_spam,
139 'mark-ham' => {function => \&mark_ham,
141 'learn' => {function => \&learn,
142 arguments => {'skip_missing|skip-missing!' => 0,
144 defaults => {skip_missing => 0,
147 'help' => {function => sub {pod2usage({verbose => 2});}}
150 pod2usage() if $options{help};
151 pod2usage({verbose=>2}) if $options{man};
153 $DEBUG = $options{debug};
156 $options{verbose} = $options{verbose} - $options{quiet};
158 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
160 my ($subcommand) = shift @ARGV;
161 if (not defined $subcommand) {
162 $subcommand = 'help';
163 print STDERR "You must provide a subcommand; displaying usage.\n";
165 } elsif (not exists $subcommands{$subcommand}) {
166 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
170 if (exists $options{spool_dir} and defined $options{spool_dir}) {
171 $config{spool_dir} = $options{spool_dir};
173 if ($subcommand ne 'help') {
174 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
177 handle_subcommand_arguments(\@ARGV,
178 $subcommands{$subcommand}{arguments},
179 $subcommands{$subcommand}{defaults},
181 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
193 my ($spam_ham,$options,$opts,$config,$argv) = @_;
194 my $regex = shift @{$argv};
195 for my $bug_num (@{$argv}) {
196 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
197 die "Unable to open bug log spam for $bug_num";
199 my ($bn,$rec,$mid) = @_;
200 my $body = $rec->{text};
201 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
203 if ($subject =~ /\Q$regex\E/) {
206 if ($mid =~ /\Q$regex\E/) {
210 print STDERR "it's a match" if $DEBUG;
211 if ($spam_ham eq 'spam') {
212 $spam->add_spam($mid);
214 $spam->add_ham($mid);
226 my ($options,$opts,$config,$argv) = @_;
227 for my $bug_num (@{$argv}) {
229 spam_score_bug($bug_num,
231 $options->{spamc_opts},
234 print "$_->{score} $_->{message_id} $_->{subject}\n"
240 my ($options,$opts,$config,$argv) = @_;
242 for my $bug_num (@{$argv}) {
243 if ($opts->{skip_missing} and
244 not defined getbuglocation($bug_num,'log')) {
245 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
248 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
249 die "Unable to open bug log spam for $bug_num";
251 my ($bn,$rec,$mid) = @_;
252 if ($spam->is_spam($mid)) {
253 spamc_learn_spam($rec,$options->{spamc},$options->{spamc_opts});
256 if ($spam->is_ham($mid)) {
257 spamc_learn_ham($rec,$options->{spamc},$options->{spamc_opts});
268 my ($options,$opts,$config,$argv) = @_;
270 for my $bug_num (@{$argv}) {
271 if ($opts->{skip_missing} and
272 not defined getbuglocation($bug_num,'log')) {
273 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
276 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
277 die "Unable to open bug log spam for $bug_num";
279 my ($bn,$rec,$mid) = @_;
280 if ($spam->is_spam($mid)) {
281 print STDERR "already spam\n" if $DEBUG;
284 if ($spam->is_ham($mid)) {
285 print STDERR "already ham\n" if $DEBUG;
288 my ($score,$is_spam,$report,$threshold) =
291 $options->{spamc_opts},
294 print STDERR "it's spam ($score)\n" if $DEBUG;
295 $spam->add_spam($mid);
296 } elsif ($score < $opts->{ham_threshold}) {
297 print STDERR "it's really ham ($score)\n" if $DEBUG;
298 $spam->add_ham($mid);
301 print STDERR "it's ham ($score)\n" if $DEBUG;
311 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
315 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
316 die "Unable to open bug log spam for $bug";
320 my ($bn,$rec,$mid) = @_;
323 if ($spam->is_spam($mid)) {
325 } elsif ($spam->is_ham($mid)) {
330 spam_score($rec,$spamc,$spamc_opts);
331 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
344 my ($record,$spamc,$spamc_opts) = @_;
349 my ($spamc_in,$spamc_out);
350 my $old_sig = $SIG{"PIPE"};
352 die "SIGPIPE in child for some reason";
355 open3($spamc_in,$spamc_out,0,
356 $spamc,@{$spamc_opts}) or
357 die "Unable to fork spamc: $!";
359 die "Unable to fork spamc";
361 print {$spamc_in} $record->{text};
362 close($spamc_in) or die "Unable to close spamc_in: $!";
363 waitpid($childpid,0);
365 $exit_code = $? >> 8;
368 $report = <$spamc_out>;
370 $SIG{"PIPE"} = $old_sig;
373 carp "processing of message failed [$@]\n";
376 return ($exit_code,$report);
380 my ($record,$spamc,$spamc_opts) = @_;
381 my ($score,$threshold,$report,$exit_code);
382 ($exit_code,$report) =
383 spamc_bug($record,$spamc,[@{$spamc_opts},'-c']);
384 if (defined $report) {
385 ($score,$threshold) = $report =~ s{^(-?[\d\.]+)/(-?[\d\.]+)\n?}{};
387 return wantarray?($score,$exit_code,$report):$score;
390 sub spamc_learn_ham {
391 spamc_learn('ham',@_);
394 sub spamc_learn_forget {
395 spamc_learn('forget',@_);
398 sub spamc_learn_spam {
399 spamc_learn('spam',@_);
403 my ($type,$record,$spamc,$spamc_opts) = @_;
404 spamc_bug($record,$spamc,[@{$spamc_opts},'-L',$type])
408 my ($sub,$bug_num) = @_;
409 my $log = Debbugs::Log->new(bug_num => $bug_num) or
410 die "Unable to open bug log for $bug_num";
412 while (my $record = $log->read_record()) {
413 next if $record->{type} eq 'html';
414 next if $record->{type} eq 'autocheck';
415 my ($msg_id) = record_regex($record,
416 qr/^Message-Id:\s+<(.+)>/mi);
417 next unless defined $msg_id;
418 print STDERR "examining $msg_id: " if $DEBUG;
419 if ($msg_id =~ /$config{email_domain}$/) {
420 print STDERR "skipping\n" if $DEBUG;
423 if ($seen_msgids{$msg_id}) {
424 print STDERR "already seen\n" if $DEBUG;
427 $seen_msgids{$msg_id}=1;
428 $sub->($bug_num,$record,$msg_id);
429 print STDERR "\n" if $DEBUG;
437 # cperl-indent-level: 4
438 # indent-tabs-mode: nil