2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
92 use Debbugs::Log qw(record_regex);
93 use Debbugs::Log::Spam;
94 use Debbugs::Config qw(:config);
95 use Debbugs::Command qw(:all);
96 use IPC::Open3 qw(open3);
110 handle_main_arguments(\%options,
115 'spamc_opts|spamc-opts=s@' => 0,
116 'spool_dir|spool-dir=s',
117 'debug|d+','help|h|?','man|m');
120 ('auto-scan' => {function => \&auto_spamscan,
121 arguments => {'ham_threshold|ham-threshold=s' => 0,
123 defaults => {ham_threshold => -5},
125 'score' => {function => \&score_bug,
126 arguments => {'skip_seen|skip-seen!' => 0
129 'mark-spam' => {function => \&mark_spam,
131 'mark-ham' => {function => \&mark_ham,
133 'help' => {function => sub {pod2usage({verbose => 2});}}
136 pod2usage() if $options{help};
137 pod2usage({verbose=>2}) if $options{man};
139 $DEBUG = $options{debug};
142 $options{verbose} = $options{verbose} - $options{quiet};
144 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
146 my ($subcommand) = shift @ARGV;
147 if (not defined $subcommand) {
148 $subcommand = 'help';
149 print STDERR "You must provide a subcommand; displaying usage.\n";
151 } elsif (not exists $subcommands{$subcommand}) {
152 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
156 if (exists $options{spool_dir} and defined $options{spool_dir}) {
157 $config{spool_dir} = $options{spool_dir};
159 if ($subcommand ne 'help') {
160 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
163 handle_subcommand_arguments(\@ARGV,
164 $subcommands{$subcommand}{arguments},
165 $subcommands{$subcommand}{defaults},
167 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
179 my ($spam_ham,$options,$opts,$config,$argv) = @_;
180 my $regex = shift @{$argv};
181 for my $bug_num (@{$argv}) {
182 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
183 die "Unable to open bug log spam for $bug_num";
185 my ($bn,$rec,$mid) = @_;
186 my $body = $rec->{text};
187 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
189 if ($subject =~ /\Q$regex\E/) {
192 if ($mid =~ /\Q$regex\E/) {
195 if ($spam_ham eq 'spam') {
196 $spam->add_spam($mid);
198 $spam->add_ham($mid);
208 my ($options,$opts,$config,$argv) = @_;
209 for my $bug_num (@{$argv}) {
211 spam_score_bug($bug_num,
213 $options->{spamc_opts},
216 print "$_->{score} $_->{message_id} $_->{subject}\n"
222 my ($options,$opts,$config,$argv) = @_;
224 for my $bug_num (@{$argv}) {
225 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
226 die "Unable to open bug log spam for $bug_num";
228 my ($bn,$rec,$mid) = @_;
229 if ($spam->is_spam($mid)) {
230 print STDERR "already spam\n" if $DEBUG;
233 if ($spam->is_ham($mid)) {
234 print STDERR "already ham\n" if $DEBUG;
237 my ($score,$is_spam,$report,$threshold) =
240 $options->{spamc_opts},
243 print STDERR "it's spam ($score)\n" if $DEBUG;
244 $spam->add_spam($mid);
245 } elsif ($score < $opts->{ham_threshold}) {
246 print STDERR "it's really ham ($score)\n" if $DEBUG;
247 $spam->add_ham($mid);
250 print STDERR "it's ham ($score)\n" if $DEBUG;
260 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
264 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
265 die "Unable to open bug log spam for $bug";
269 my ($bn,$rec,$mid) = @_;
272 if ($spam->is_spam($mid)) {
274 } elsif ($spam->is_ham($mid)) {
279 spam_score($rec,$spamc,$spamc_opts);
280 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
293 my ($record,$spamc,$spamc_opts) = @_;
294 my ($score,$threshold,$report);
297 my ($spamc_in,$spamc_out);
298 my $old_sig = $SIG{"PIPE"};
300 die "SIGPIPE in child for some reason";
303 open3($spamc_in,$spamc_out,0,
304 $spamc,'-E',@{$spamc_opts}) or
305 die "Unable to fork spamc: $!";
307 die "Unable to fork spamc";
309 print {$spamc_in} $record->{text};
310 close($spamc_in) or die "Unable to close spamc_in: $!";
311 waitpid($childpid,0);
315 my ($first_line,@report) = <$spamc_out>;
317 print STDERR "[$?;".($? >> 8)."] ";
318 print STDERR $first_line,@report;
321 if (defined $first_line) {
323 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
324 $report = join('',@report);
327 $SIG{"PIPE"} = $old_sig;
330 carp "processing of message failed [$@]\n";
333 return wantarray?($score,$is_spam,$report):$score;
337 my ($sub,$bug_num) = @_;
338 my $log = Debbugs::Log->new(bug_num => $bug_num) or
339 die "Unable to open bug log for $bug_num";
341 while (my $record = $log->read_record()) {
342 next if $record->{type} eq 'html';
343 next if $record->{type} eq 'autocheck';
344 my ($msg_id) = record_regex($record,
345 qr/^Message-Id:\s+<(.+)>/mi);
346 next unless defined $msg_id;
347 print STDERR "examining $msg_id: " if $DEBUG;
348 if ($msg_id =~ /$config{email_domain}$/) {
349 print STDERR "skipping\n" if $DEBUG;
352 if ($seen_msgids{$msg_id}) {
353 print STDERR "already seen\n" if $DEBUG;
356 $seen_msgids{$msg_id}=1;
357 $sub->($bug_num,$record,$msg_id);
358 print STDERR "\n" if $DEBUG;
366 # cperl-indent-level: 4
367 # indent-tabs-mode: nil