2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 Mark messages as spam if there is a regex match to subject or message
72 Mark messages as ham if there is a regex match to subject or message
86 use Debbugs::Log qw(record_regex);
87 use Debbugs::Log::Spam;
88 use Debbugs::Config qw(:config);
89 use Debbugs::Command qw(:all);
90 use IPC::Open3 qw(open3);
104 handle_main_arguments(\%options,
109 'spamc_opts|spamc-opts=s@' => 0,
110 'spool_dir|spool-dir=s',
111 'debug|d+','help|h|?','man|m');
114 ('auto-scan' => {function => \&auto_spamscan,
115 arguments => {'ham_threshold=s' => -5,
118 'score' => {function => \&score_bug,
120 'mark-spam' => {function => \&mark_spam,
122 'mark-ham' => {function => \&mark_ham,
124 'help' => {function => sub {pod2usage({verbose => 2});}}
127 pod2usage() if $options{help};
128 pod2usage({verbose=>2}) if $options{man};
130 $DEBUG = $options{debug};
133 $options{verbose} = $options{verbose} - $options{quiet};
135 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
137 my ($subcommand) = shift @ARGV;
138 if (not defined $subcommand) {
139 $subcommand = 'help';
140 print STDERR "You must provide a subcommand; displaying usage.\n";
142 } elsif (not exists $subcommands{$subcommand}) {
143 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
147 if (exists $options{spool_dir} and defined $options{spool_dir}) {
148 $config{spool_dir} = $options{spool_dir};
150 if ($subcommand ne 'help') {
151 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
154 handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
155 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
167 my ($spam_ham,$options,$opts,$config,$argv) = @_;
168 my $regex = shift @{$argv};
169 for my $bug_num (@{$argv}) {
170 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
171 die "Unable to open bug log spam for $bug_num";
173 my ($bn,$rec,$mid) = @_;
174 my $body = $rec->{text};
175 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
177 if ($subject =~ /\Q$regex\E/) {
180 if ($mid =~ /\Q$regex\E/) {
183 if ($spam_ham eq 'spam') {
184 $spam->add_spam($mid);
186 $spam->add_ham($mid);
196 my ($options,$opts,$config,$argv) = @_;
197 for my $bug_num (@{$argv}) {
199 spam_score_bug($bug_num,
201 $options->{spamc_opts});
202 print "$_->{score} $_->{message_id} $_->{subject}\n"
208 my ($options,$opts,$config,$argv) = @_;
210 for my $bug_num (@{$argv}) {
211 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
212 die "Unable to open bug log spam for $bug_num";
214 my ($bn,$rec,$mid) = @_;
215 if ($spam->is_spam($mid)) {
216 print STDERR "already spam\n" if $DEBUG;
219 if ($spam->is_ham($mid)) {
220 print STDERR "already ham\n" if $DEBUG;
223 my ($score,$is_spam,$report,$threshold) =
226 $options->{spamc_opts},
229 print STDERR "it's spam ($score)\n" if $DEBUG;
230 $spam->add_spam($mid);
231 } elsif ($score < $options->{ham_threshold}) {
232 print STDERR "it's really ham ($score)\n" if $DEBUG;
233 $spam->add_ham($mid);
236 print STDERR "it's ham ($score)\n" if $DEBUG;
246 my ($bug,$spamc,$spamc_opts) = @_;
250 my ($bn,$rec,$mid) = @_;
252 spam_score($rec,$spamc,$spamc_opts);
256 subject => ($rec->{text} =~ /^Subject: *(.+)/i)[0],
265 my ($record,$spamc,$spamc_opts) = @_;
266 my ($score,$threshold,$report);
269 my ($spamc_in,$spamc_out);
270 my $old_sig = $SIG{"PIPE"};
272 die "SIGPIPE in child for some reason";
275 open3($spamc_in,$spamc_out,0,
276 $spamc,'-E',@{$spamc_opts}) or
277 die "Unable to fork spamc: $!";
279 die "Unable to fork spamc";
281 print {$spamc_in} $record->{text};
282 close($spamc_in) or die "Unable to close spamc_in: $!";
283 waitpid($childpid,0);
287 my ($first_line,@report) = <$spamc_out>;
289 print STDERR "[$?;".($? >> 8)."] ";
290 print STDERR $first_line,@report;
293 if (defined $first_line) {
295 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
296 $report = join('',@report);
299 $SIG{"PIPE"} = $old_sig;
302 carp "processing of message failed [$@]\n";
305 return wantarray?($score,$is_spam,$report):$score;
309 my ($sub,$bug_num) = @_;
310 my $log = Debbugs::Log->new(bug_num => $bug_num) or
311 die "Unable to open bug log for $bug_num";
313 while (my $record = $log->read_record()) {
314 next if $record->{type} eq 'html';
315 next if $record->{type} eq 'autocheck';
316 my ($msg_id) = record_regex($record,
317 qr/^Message-Id:\s+<(.+)>/mi);
318 next unless defined $msg_id;
319 if ($msg_id =~ /$config{email_domain}$/) {
320 print STDERR "skipping $msg_id\n" if $DEBUG;
323 print STDERR "examining $msg_id: " if $DEBUG;
324 if ($seen_msgids{$msg_id}) {
325 print STDERR "already seen\n" if $DEBUG;
328 $seen_msgids{$msg_id}=1;
329 $sub->($bug_num,$record,$msg_id);
337 # cperl-indent-level: 4
338 # indent-tabs-mode: nil