2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
92 use Debbugs::Log qw(record_regex);
93 use Debbugs::Log::Spam;
94 use Debbugs::Config qw(:config);
95 use Debbugs::Command qw(:all);
96 use IPC::Open3 qw(open3);
110 handle_main_arguments(\%options,
115 'spamc_opts|spamc-opts=s@' => 0,
116 'spool_dir|spool-dir=s',
117 'debug|d+','help|h|?','man|m');
120 ('auto-scan' => {function => \&auto_spamscan,
121 arguments => {'ham_threshold|ham-threshold=s' => 0,
123 defaults => {ham_threshold => -5},
125 'score' => {function => \&score_bug,
126 arguments => {'skip_seen|skip-seen!' => 0
129 'mark-spam' => {function => \&mark_spam,
131 'mark-ham' => {function => \&mark_ham,
133 'help' => {function => sub {pod2usage({verbose => 2});}}
136 pod2usage() if $options{help};
137 pod2usage({verbose=>2}) if $options{man};
139 $DEBUG = $options{debug};
142 $options{verbose} = $options{verbose} - $options{quiet};
144 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
146 my ($subcommand) = shift @ARGV;
147 if (not defined $subcommand) {
148 $subcommand = 'help';
149 print STDERR "You must provide a subcommand; displaying usage.\n";
151 } elsif (not exists $subcommands{$subcommand}) {
152 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
156 if (exists $options{spool_dir} and defined $options{spool_dir}) {
157 $config{spool_dir} = $options{spool_dir};
159 if ($subcommand ne 'help') {
160 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
163 handle_subcommand_arguments(\@ARGV,
164 $subcommands{$subcommand}{arguments},
165 $subcommands{$subcommand}{defaults},
167 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
179 my ($spam_ham,$options,$opts,$config,$argv) = @_;
180 my $regex = shift @{$argv};
181 for my $bug_num (@{$argv}) {
182 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
183 die "Unable to open bug log spam for $bug_num";
185 my ($bn,$rec,$mid) = @_;
186 my $body = $rec->{text};
187 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
189 if ($subject =~ /\Q$regex\E/) {
192 if ($mid =~ /\Q$regex\E/) {
196 print STDERR "it's a match" if $DEBUG;
197 if ($spam_ham eq 'spam') {
198 $spam->add_spam($mid);
200 $spam->add_ham($mid);
212 my ($options,$opts,$config,$argv) = @_;
213 for my $bug_num (@{$argv}) {
215 spam_score_bug($bug_num,
217 $options->{spamc_opts},
220 print "$_->{score} $_->{message_id} $_->{subject}\n"
226 my ($options,$opts,$config,$argv) = @_;
228 for my $bug_num (@{$argv}) {
229 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
230 die "Unable to open bug log spam for $bug_num";
232 my ($bn,$rec,$mid) = @_;
233 if ($spam->is_spam($mid)) {
234 print STDERR "already spam\n" if $DEBUG;
237 if ($spam->is_ham($mid)) {
238 print STDERR "already ham\n" if $DEBUG;
241 my ($score,$is_spam,$report,$threshold) =
244 $options->{spamc_opts},
247 print STDERR "it's spam ($score)\n" if $DEBUG;
248 $spam->add_spam($mid);
249 } elsif ($score < $opts->{ham_threshold}) {
250 print STDERR "it's really ham ($score)\n" if $DEBUG;
251 $spam->add_ham($mid);
254 print STDERR "it's ham ($score)\n" if $DEBUG;
264 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
268 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
269 die "Unable to open bug log spam for $bug";
273 my ($bn,$rec,$mid) = @_;
276 if ($spam->is_spam($mid)) {
278 } elsif ($spam->is_ham($mid)) {
283 spam_score($rec,$spamc,$spamc_opts);
284 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
297 my ($record,$spamc,$spamc_opts) = @_;
298 my ($score,$threshold,$report);
301 my ($spamc_in,$spamc_out);
302 my $old_sig = $SIG{"PIPE"};
304 die "SIGPIPE in child for some reason";
307 open3($spamc_in,$spamc_out,0,
308 $spamc,'-E',@{$spamc_opts}) or
309 die "Unable to fork spamc: $!";
311 die "Unable to fork spamc";
313 print {$spamc_in} $record->{text};
314 close($spamc_in) or die "Unable to close spamc_in: $!";
315 waitpid($childpid,0);
319 my ($first_line,@report) = <$spamc_out>;
321 print STDERR "[$?;".($? >> 8)."] ";
322 print STDERR $first_line,@report;
325 if (defined $first_line) {
327 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
328 $report = join('',@report);
331 $SIG{"PIPE"} = $old_sig;
334 carp "processing of message failed [$@]\n";
337 return wantarray?($score,$is_spam,$report):$score;
341 my ($sub,$bug_num) = @_;
342 my $log = Debbugs::Log->new(bug_num => $bug_num) or
343 die "Unable to open bug log for $bug_num";
345 while (my $record = $log->read_record()) {
346 next if $record->{type} eq 'html';
347 next if $record->{type} eq 'autocheck';
348 my ($msg_id) = record_regex($record,
349 qr/^Message-Id:\s+<(.+)>/mi);
350 next unless defined $msg_id;
351 print STDERR "examining $msg_id: " if $DEBUG;
352 if ($msg_id =~ /$config{email_domain}$/) {
353 print STDERR "skipping\n" if $DEBUG;
356 if ($seen_msgids{$msg_id}) {
357 print STDERR "already seen\n" if $DEBUG;
360 $seen_msgids{$msg_id}=1;
361 $sub->($bug_num,$record,$msg_id);
362 print STDERR "\n" if $DEBUG;
370 # cperl-indent-level: 4
371 # indent-tabs-mode: nil