2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
83 Learn from messages which are ham/spam
92 /usr/sbin/spamd --socketpath=/home/debbugs/spamd_socket \
93 --nouser-config --cf='include /home/debbugs/.spamassassin/user_prefs' \
94 --cf='allow_user_rules 1' --allow-tell;
98 debbugs-spam --spamc-opts '-U' --spamc-opts '/home/debbugs/spamd_socket' \
106 use Debbugs::Log qw(record_regex);
107 use Debbugs::Log::Spam;
108 use Debbugs::Config qw(:config);
109 use Debbugs::Command qw(:all);
110 use IPC::Open3 qw(open3);
124 handle_main_arguments(\%options,
129 'spamc_opts|spamc-opts=s@' => 0,
130 'spool_dir|spool-dir=s',
131 'debug|d+','help|h|?','man|m');
134 ('auto-scan' => {function => \&auto_spamscan,
135 arguments => {'ham_threshold|ham-threshold=s' => 0,
137 defaults => {ham_threshold => -5},
139 'score' => {function => \&score_bug,
140 arguments => {'skip_seen|skip-seen!' => 0
143 'mark-spam' => {function => \&mark_spam,
145 'mark-ham' => {function => \&mark_ham,
147 'learn' => {fuction => \&learn,
149 'help' => {function => sub {pod2usage({verbose => 2});}}
152 pod2usage() if $options{help};
153 pod2usage({verbose=>2}) if $options{man};
155 $DEBUG = $options{debug};
158 $options{verbose} = $options{verbose} - $options{quiet};
160 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
162 my ($subcommand) = shift @ARGV;
163 if (not defined $subcommand) {
164 $subcommand = 'help';
165 print STDERR "You must provide a subcommand; displaying usage.\n";
167 } elsif (not exists $subcommands{$subcommand}) {
168 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
172 if (exists $options{spool_dir} and defined $options{spool_dir}) {
173 $config{spool_dir} = $options{spool_dir};
175 if ($subcommand ne 'help') {
176 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
179 handle_subcommand_arguments(\@ARGV,
180 $subcommands{$subcommand}{arguments},
181 $subcommands{$subcommand}{defaults},
183 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
195 my ($spam_ham,$options,$opts,$config,$argv) = @_;
196 my $regex = shift @{$argv};
197 for my $bug_num (@{$argv}) {
198 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
199 die "Unable to open bug log spam for $bug_num";
201 my ($bn,$rec,$mid) = @_;
202 my $body = $rec->{text};
203 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
205 if ($subject =~ /\Q$regex\E/) {
208 if ($mid =~ /\Q$regex\E/) {
212 print STDERR "it's a match" if $DEBUG;
213 if ($spam_ham eq 'spam') {
214 $spam->add_spam($mid);
216 $spam->add_ham($mid);
227 my ($options,$opts,$config,$argv) = @_;
228 for my $bug_num (@{$argv}) {
229 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
230 die "Unable to open bug log spam for $bug_num";
232 my ($bn,$rec,$mid) = @_;
234 if ($spam->is_spam($mid)) {
236 spam_score($rec,$options->{spamc},
237 [@{$options->{spamc_opts}},
241 print STDERR "learning spam" if $DEBUG;
242 } elsif ($spam->is_ham($mid)) {
244 spam_score($rec,$options->{spamc},
245 [@{$options->{spamc_opts}},
249 print STDERR "learning ham" if $DEBUG;
251 print STDERR "not learning" if $DEBUG;
253 print STDERR " from $mid" if $DEBUG;
262 my ($options,$opts,$config,$argv) = @_;
263 for my $bug_num (@{$argv}) {
265 spam_score_bug($bug_num,
267 $options->{spamc_opts},
270 print "$_->{score} $_->{message_id} $_->{subject}\n"
276 my ($options,$opts,$config,$argv) = @_;
278 for my $bug_num (@{$argv}) {
279 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
280 die "Unable to open bug log spam for $bug_num";
282 my ($bn,$rec,$mid) = @_;
283 if ($spam->is_spam($mid)) {
284 print STDERR "already spam\n" if $DEBUG;
287 if ($spam->is_ham($mid)) {
288 print STDERR "already ham\n" if $DEBUG;
291 my ($score,$is_spam,$report,$threshold) =
294 $options->{spamc_opts},
297 print STDERR "it's spam ($score)\n" if $DEBUG;
298 $spam->add_spam($mid);
299 } elsif ($score < $opts->{ham_threshold}) {
300 print STDERR "it's really ham ($score)\n" if $DEBUG;
301 $spam->add_ham($mid);
304 print STDERR "it's ham ($score)\n" if $DEBUG;
314 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
318 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
319 die "Unable to open bug log spam for $bug";
323 my ($bn,$rec,$mid) = @_;
326 if ($spam->is_spam($mid)) {
328 } elsif ($spam->is_ham($mid)) {
333 spam_score($rec,$spamc,$spamc_opts);
334 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
347 my ($record,$spamc,$spamc_opts) = @_;
348 my ($score,$threshold,$report);
351 my ($spamc_in,$spamc_out);
352 my $old_sig = $SIG{"PIPE"};
354 die "SIGPIPE in child for some reason";
357 open3($spamc_in,$spamc_out,0,
358 $spamc,'-E',@{$spamc_opts}) or
359 die "Unable to fork spamc: $!";
361 die "Unable to fork spamc";
363 print {$spamc_in} $record->{text};
364 close($spamc_in) or die "Unable to close spamc_in: $!";
365 waitpid($childpid,0);
369 my ($first_line,@report) = <$spamc_out>;
371 print STDERR "[$?;".($? >> 8)."] ";
372 print STDERR $first_line,@report;
375 if (defined $first_line) {
377 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
378 $report = join('',@report);
381 $SIG{"PIPE"} = $old_sig;
384 carp "processing of message failed [$@]\n";
387 return wantarray?($score,$is_spam,$report):$score;
391 my ($sub,$bug_num) = @_;
392 my $log = Debbugs::Log->new(bug_num => $bug_num) or
393 die "Unable to open bug log for $bug_num";
395 while (my $record = $log->read_record()) {
396 next if $record->{type} eq 'html';
397 next if $record->{type} eq 'autocheck';
398 my ($msg_id) = record_regex($record,
399 qr/^Message-Id:\s+<(.+)>/mi);
400 next unless defined $msg_id;
401 print STDERR "examining $msg_id: " if $DEBUG;
402 if ($msg_id =~ /$config{email_domain}$/) {
403 print STDERR "skipping\n" if $DEBUG;
406 if ($seen_msgids{$msg_id}) {
407 print STDERR "already seen\n" if $DEBUG;
410 $seen_msgids{$msg_id}=1;
411 $sub->($bug_num,$record,$msg_id);
412 print STDERR "\n" if $DEBUG;
420 # cperl-indent-level: 4
421 # indent-tabs-mode: nil