2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
83 Learn from messages which have been marked as spam/ham
96 use Debbugs::Log qw(record_regex);
97 use Debbugs::Log::Spam;
98 use Debbugs::Config qw(:config);
99 use Debbugs::Command qw(:all);
100 use Debbugs::Common qw(getbuglocation);
101 use IPC::Open3 qw(open3);
114 handle_main_arguments(\%options,
118 'spamc_opts|spamc-opts=s@' => 0,
119 'spool_dir|spool-dir=s',
122 'debug|d+','help|h|?','man|m');
125 ('auto-scan' => {function => \&auto_spamscan,
126 arguments => {'ham_threshold|ham-threshold=s' => 0,
127 'skip_missing|skip-missing!' => 0,
129 defaults => {ham_threshold => -5,
133 'score' => {function => \&score_bug,
134 arguments => {'skip_seen|skip-seen!' => 0
137 'mark-spam' => {function => \&mark_spam,
139 'mark-ham' => {function => \&mark_ham,
141 'learn' => {function => \&learn,
143 'help' => {function => sub {pod2usage({verbose => 2});}}
146 pod2usage() if $options{help};
147 pod2usage({verbose=>2}) if $options{man};
149 $DEBUG = $options{debug};
152 $options{verbose} = $options{verbose} - $options{quiet};
154 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
156 my ($subcommand) = shift @ARGV;
157 if (not defined $subcommand) {
158 $subcommand = 'help';
159 print STDERR "You must provide a subcommand; displaying usage.\n";
161 } elsif (not exists $subcommands{$subcommand}) {
162 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
166 if (exists $options{spool_dir} and defined $options{spool_dir}) {
167 $config{spool_dir} = $options{spool_dir};
169 if ($subcommand ne 'help') {
170 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
173 handle_subcommand_arguments(\@ARGV,
174 $subcommands{$subcommand}{arguments},
175 $subcommands{$subcommand}{defaults},
177 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
189 my ($spam_ham,$options,$opts,$config,$argv) = @_;
190 my $regex = shift @{$argv};
191 for my $bug_num (@{$argv}) {
192 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
193 die "Unable to open bug log spam for $bug_num";
195 my ($bn,$rec,$mid) = @_;
196 my $body = $rec->{text};
197 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
199 if ($subject =~ /\Q$regex\E/) {
202 if ($mid =~ /\Q$regex\E/) {
206 print STDERR "it's a match" if $DEBUG;
207 if ($spam_ham eq 'spam') {
208 $spam->add_spam($mid);
210 $spam->add_ham($mid);
222 my ($options,$opts,$config,$argv) = @_;
223 for my $bug_num (@{$argv}) {
225 spam_score_bug($bug_num,
227 $options->{spamc_opts},
230 print "$_->{score} $_->{message_id} $_->{subject}\n"
236 my ($options,$opts,$config,$argv) = @_;
238 for my $bug_num (@{$argv}) {
239 if ($opts->{skip_missing} and
240 not defined getbuglocation($bug_num,'log')) {
241 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
244 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
245 die "Unable to open bug log spam for $bug_num";
247 my ($bn,$rec,$mid) = @_;
248 if ($spam->is_spam($mid)) {
249 spamc_learn_spam($rec,$options->{spamc},$options->{spamc_opts});
252 if ($spam->is_ham($mid)) {
253 spamc_learn_ham($rec,$options->{spamc},$options->{spamc_opts});
264 my ($options,$opts,$config,$argv) = @_;
266 for my $bug_num (@{$argv}) {
267 if ($opts->{skip_missing} and
268 not defined getbuglocation($bug_num,'log')) {
269 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
272 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
273 die "Unable to open bug log spam for $bug_num";
275 my ($bn,$rec,$mid) = @_;
276 if ($spam->is_spam($mid)) {
277 print STDERR "already spam\n" if $DEBUG;
280 if ($spam->is_ham($mid)) {
281 print STDERR "already ham\n" if $DEBUG;
284 my ($score,$is_spam,$report,$threshold) =
287 $options->{spamc_opts},
290 print STDERR "it's spam ($score)\n" if $DEBUG;
291 $spam->add_spam($mid);
292 } elsif ($score < $opts->{ham_threshold}) {
293 print STDERR "it's really ham ($score)\n" if $DEBUG;
294 $spam->add_ham($mid);
297 print STDERR "it's ham ($score)\n" if $DEBUG;
307 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
311 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
312 die "Unable to open bug log spam for $bug";
316 my ($bn,$rec,$mid) = @_;
319 if ($spam->is_spam($mid)) {
321 } elsif ($spam->is_ham($mid)) {
326 spam_score($rec,$spamc,$spamc_opts);
327 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
340 my ($record,$spamc,$spamc_opts) = @_;
345 my ($spamc_in,$spamc_out);
346 my $old_sig = $SIG{"PIPE"};
348 die "SIGPIPE in child for some reason";
351 open3($spamc_in,$spamc_out,0,
352 $spamc,@{$spamc_opts}) or
353 die "Unable to fork spamc: $!";
355 die "Unable to fork spamc";
357 print {$spamc_in} $record->{text};
358 close($spamc_in) or die "Unable to close spamc_in: $!";
359 waitpid($childpid,0);
361 $exit_code = $? >> 8;
364 $report = <$spamc_out>;
366 $SIG{"PIPE"} = $old_sig;
369 carp "processing of message failed [$@]\n";
372 return ($exit_code,$report);
376 my ($record,$spamc,$spamc_opts) = @_;
377 my ($score,$threshold,$report,$exit_code);
378 ($exit_code,$report) =
379 spamc_bug($record,$spamc,[@{$spamc_opts},'-c']);
380 if (defined $report) {
381 ($score,$threshold) = $report =~ s{^(-?[\d\.]+)/(-?[\d\.]+)\n?}{};
383 return wantarray?($score,$exit_code,$report):$score;
386 sub spamc_learn_ham {
387 spamc_learn('ham',@_);
390 sub spamc_learn_forget {
391 spamc_learn('forget',@_);
394 sub spamc_learn_spam {
395 spamc_learn('spam',@_);
399 my ($type,$record,$spamc,$spamc_opts) = @_;
400 spamc_bug($record,$spamc,[@{$spamc_opts},'-L',$type])
404 my ($sub,$bug_num) = @_;
405 my $log = Debbugs::Log->new(bug_num => $bug_num) or
406 die "Unable to open bug log for $bug_num";
408 while (my $record = $log->read_record()) {
409 next if $record->{type} eq 'html';
410 next if $record->{type} eq 'autocheck';
411 my ($msg_id) = record_regex($record,
412 qr/^Message-Id:\s+<(.+)>/mi);
413 next unless defined $msg_id;
414 print STDERR "examining $msg_id: " if $DEBUG;
415 if ($msg_id =~ /$config{email_domain}$/) {
416 print STDERR "skipping\n" if $DEBUG;
419 if ($seen_msgids{$msg_id}) {
420 print STDERR "already seen\n" if $DEBUG;
423 $seen_msgids{$msg_id}=1;
424 $sub->($bug_num,$record,$msg_id);
425 print STDERR "\n" if $DEBUG;
433 # cperl-indent-level: 4
434 # indent-tabs-mode: nil