2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
92 use Debbugs::Log qw(record_regex);
93 use Debbugs::Log::Spam;
94 use Debbugs::Config qw(:config);
95 use Debbugs::Command qw(:all);
96 use IPC::Open3 qw(open3);
110 handle_main_arguments(\%options,
115 'spamc_opts|spamc-opts=s@' => 0,
116 'spool_dir|spool-dir=s',
117 'debug|d+','help|h|?','man|m');
120 ('auto-scan' => {function => \&auto_spamscan,
121 arguments => {'ham_threshold=s' => -5,
124 'score' => {function => \&score_bug,
125 arguments => {'skip_seen|skip-seen!' => 0
128 'mark-spam' => {function => \&mark_spam,
130 'mark-ham' => {function => \&mark_ham,
132 'help' => {function => sub {pod2usage({verbose => 2});}}
135 pod2usage() if $options{help};
136 pod2usage({verbose=>2}) if $options{man};
138 $DEBUG = $options{debug};
141 $options{verbose} = $options{verbose} - $options{quiet};
143 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
145 my ($subcommand) = shift @ARGV;
146 if (not defined $subcommand) {
147 $subcommand = 'help';
148 print STDERR "You must provide a subcommand; displaying usage.\n";
150 } elsif (not exists $subcommands{$subcommand}) {
151 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
155 if (exists $options{spool_dir} and defined $options{spool_dir}) {
156 $config{spool_dir} = $options{spool_dir};
158 if ($subcommand ne 'help') {
159 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
162 handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
163 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
175 my ($spam_ham,$options,$opts,$config,$argv) = @_;
176 my $regex = shift @{$argv};
177 for my $bug_num (@{$argv}) {
178 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
179 die "Unable to open bug log spam for $bug_num";
181 my ($bn,$rec,$mid) = @_;
182 my $body = $rec->{text};
183 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
185 if ($subject =~ /\Q$regex\E/) {
188 if ($mid =~ /\Q$regex\E/) {
191 if ($spam_ham eq 'spam') {
192 $spam->add_spam($mid);
194 $spam->add_ham($mid);
204 my ($options,$opts,$config,$argv) = @_;
205 for my $bug_num (@{$argv}) {
207 spam_score_bug($bug_num,
209 $options->{spamc_opts},
212 print "$_->{score} $_->{message_id} $_->{subject}\n"
218 my ($options,$opts,$config,$argv) = @_;
220 for my $bug_num (@{$argv}) {
221 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
222 die "Unable to open bug log spam for $bug_num";
224 my ($bn,$rec,$mid) = @_;
225 if ($spam->is_spam($mid)) {
226 print STDERR "already spam\n" if $DEBUG;
229 if ($spam->is_ham($mid)) {
230 print STDERR "already ham\n" if $DEBUG;
233 my ($score,$is_spam,$report,$threshold) =
236 $options->{spamc_opts},
239 print STDERR "it's spam ($score)\n" if $DEBUG;
240 $spam->add_spam($mid);
241 } elsif ($score < $options->{ham_threshold}) {
242 print STDERR "it's really ham ($score)\n" if $DEBUG;
243 $spam->add_ham($mid);
246 print STDERR "it's ham ($score)\n" if $DEBUG;
256 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
260 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
261 die "Unable to open bug log spam for $bug";
265 my ($bn,$rec,$mid) = @_;
268 if ($spam->is_spam($mid)) {
270 } elsif ($spam->is_ham($mid)) {
275 spam_score($rec,$spamc,$spamc_opts);
276 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
289 my ($record,$spamc,$spamc_opts) = @_;
290 my ($score,$threshold,$report);
293 my ($spamc_in,$spamc_out);
294 my $old_sig = $SIG{"PIPE"};
296 die "SIGPIPE in child for some reason";
299 open3($spamc_in,$spamc_out,0,
300 $spamc,'-E',@{$spamc_opts}) or
301 die "Unable to fork spamc: $!";
303 die "Unable to fork spamc";
305 print {$spamc_in} $record->{text};
306 close($spamc_in) or die "Unable to close spamc_in: $!";
307 waitpid($childpid,0);
311 my ($first_line,@report) = <$spamc_out>;
313 print STDERR "[$?;".($? >> 8)."] ";
314 print STDERR $first_line,@report;
317 if (defined $first_line) {
319 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
320 $report = join('',@report);
323 $SIG{"PIPE"} = $old_sig;
326 carp "processing of message failed [$@]\n";
329 return wantarray?($score,$is_spam,$report):$score;
333 my ($sub,$bug_num) = @_;
334 my $log = Debbugs::Log->new(bug_num => $bug_num) or
335 die "Unable to open bug log for $bug_num";
337 while (my $record = $log->read_record()) {
338 next if $record->{type} eq 'html';
339 next if $record->{type} eq 'autocheck';
340 my ($msg_id) = record_regex($record,
341 qr/^Message-Id:\s+<(.+)>/mi);
342 next unless defined $msg_id;
343 if ($msg_id =~ /$config{email_domain}$/) {
344 print STDERR "skipping $msg_id\n" if $DEBUG;
347 print STDERR "examining $msg_id: " if $DEBUG;
348 if ($seen_msgids{$msg_id}) {
349 print STDERR "already seen\n" if $DEBUG;
352 $seen_msgids{$msg_id}=1;
353 $sub->($bug_num,$record,$msg_id);
361 # cperl-indent-level: 4
362 # indent-tabs-mode: nil