2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
92 use Debbugs::Log qw(record_regex);
93 use Debbugs::Log::Spam;
94 use Debbugs::Config qw(:config);
95 use Debbugs::Command qw(:all);
96 use Debbugs::Common qw(getbuglocation);
97 use IPC::Open3 qw(open3);
110 handle_main_arguments(\%options,
114 'spamc_opts|spamc-opts=s@' => 0,
115 'spool_dir|spool-dir=s',
118 'debug|d+','help|h|?','man|m');
121 ('auto-scan' => {function => \&auto_spamscan,
122 arguments => {'ham_threshold|ham-threshold=s' => 0,
123 'skip_missing|skip-missing!' => 0,
125 defaults => {ham_threshold => -5,
129 'score' => {function => \&score_bug,
130 arguments => {'skip_seen|skip-seen!' => 0
133 'mark-spam' => {function => \&mark_spam,
135 'mark-ham' => {function => \&mark_ham,
137 'help' => {function => sub {pod2usage({verbose => 2});}}
140 pod2usage() if $options{help};
141 pod2usage({verbose=>2}) if $options{man};
143 $DEBUG = $options{debug};
146 $options{verbose} = $options{verbose} - $options{quiet};
148 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
150 my ($subcommand) = shift @ARGV;
151 if (not defined $subcommand) {
152 $subcommand = 'help';
153 print STDERR "You must provide a subcommand; displaying usage.\n";
155 } elsif (not exists $subcommands{$subcommand}) {
156 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
160 if (exists $options{spool_dir} and defined $options{spool_dir}) {
161 $config{spool_dir} = $options{spool_dir};
163 if ($subcommand ne 'help') {
164 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
167 handle_subcommand_arguments(\@ARGV,
168 $subcommands{$subcommand}{arguments},
169 $subcommands{$subcommand}{defaults},
171 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
183 my ($spam_ham,$options,$opts,$config,$argv) = @_;
184 my $regex = shift @{$argv};
185 for my $bug_num (@{$argv}) {
186 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
187 die "Unable to open bug log spam for $bug_num";
189 my ($bn,$rec,$mid) = @_;
190 my $body = $rec->{text};
191 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
193 if ($subject =~ /\Q$regex\E/) {
196 if ($mid =~ /\Q$regex\E/) {
200 print STDERR "it's a match" if $DEBUG;
201 if ($spam_ham eq 'spam') {
202 $spam->add_spam($mid);
204 $spam->add_ham($mid);
216 my ($options,$opts,$config,$argv) = @_;
217 for my $bug_num (@{$argv}) {
219 spam_score_bug($bug_num,
221 $options->{spamc_opts},
224 print "$_->{score} $_->{message_id} $_->{subject}\n"
230 my ($options,$opts,$config,$argv) = @_;
232 for my $bug_num (@{$argv}) {
233 if ($opts->{skip_missing} and
234 not defined getbuglocation($bug_num,'log')) {
235 print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
238 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
239 die "Unable to open bug log spam for $bug_num";
241 my ($bn,$rec,$mid) = @_;
242 if ($spam->is_spam($mid)) {
243 print STDERR "already spam\n" if $DEBUG;
246 if ($spam->is_ham($mid)) {
247 print STDERR "already ham\n" if $DEBUG;
250 my ($score,$is_spam,$report,$threshold) =
253 $options->{spamc_opts},
256 print STDERR "it's spam ($score)\n" if $DEBUG;
257 $spam->add_spam($mid);
258 } elsif ($score < $opts->{ham_threshold}) {
259 print STDERR "it's really ham ($score)\n" if $DEBUG;
260 $spam->add_ham($mid);
263 print STDERR "it's ham ($score)\n" if $DEBUG;
273 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
277 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
278 die "Unable to open bug log spam for $bug";
282 my ($bn,$rec,$mid) = @_;
285 if ($spam->is_spam($mid)) {
287 } elsif ($spam->is_ham($mid)) {
292 spam_score($rec,$spamc,$spamc_opts);
293 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
306 my ($record,$spamc,$spamc_opts) = @_;
307 my ($score,$threshold,$report);
310 my ($spamc_in,$spamc_out);
311 my $old_sig = $SIG{"PIPE"};
313 die "SIGPIPE in child for some reason";
316 open3($spamc_in,$spamc_out,0,
317 $spamc,'-E',@{$spamc_opts}) or
318 die "Unable to fork spamc: $!";
320 die "Unable to fork spamc";
322 print {$spamc_in} $record->{text};
323 close($spamc_in) or die "Unable to close spamc_in: $!";
324 waitpid($childpid,0);
328 my ($first_line,@report) = <$spamc_out>;
330 print STDERR "[$?;".($? >> 8)."] ";
331 print STDERR $first_line,@report;
334 if (defined $first_line) {
336 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
337 $report = join('',@report);
340 $SIG{"PIPE"} = $old_sig;
343 carp "processing of message failed [$@]\n";
346 return wantarray?($score,$is_spam,$report):$score;
350 my ($sub,$bug_num) = @_;
351 my $log = Debbugs::Log->new(bug_num => $bug_num) or
352 die "Unable to open bug log for $bug_num";
354 while (my $record = $log->read_record()) {
355 next if $record->{type} eq 'html';
356 next if $record->{type} eq 'autocheck';
357 my ($msg_id) = record_regex($record,
358 qr/^Message-Id:\s+<(.+)>/mi);
359 next unless defined $msg_id;
360 print STDERR "examining $msg_id: " if $DEBUG;
361 if ($msg_id =~ /$config{email_domain}$/) {
362 print STDERR "skipping\n" if $DEBUG;
365 if ($seen_msgids{$msg_id}) {
366 print STDERR "already seen\n" if $DEBUG;
369 $seen_msgids{$msg_id}=1;
370 $sub->($bug_num,$record,$msg_id);
371 print STDERR "\n" if $DEBUG;
379 # cperl-indent-level: 4
380 # indent-tabs-mode: nil