2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 Mark messages as spam if there is a regex match to subject or message
72 Mark messages as ham if there is a regex match to subject or message
86 use Debbugs::Log qw(record_regex);
87 use Debbugs::Log::Spam;
88 use Debbugs::Config qw(:config);
89 use Debbugs::Command qw(:all);
90 use IPC::Open3 qw(open3);
104 handle_main_arguments(\%options,
109 'spamc_opts|spamc-opts=s@' => 0,
110 'spool_dir|spool-dir=s',
111 'debug|d+','help|h|?','man|m');
114 ('auto-scan' => {function => \&auto_spamscan,
115 arguments => {'ham_threshold=s' => -5,
118 'score' => {function => \&score_bug,
120 'mark-spam' => {function => \&mark_spam,
122 'mark-ham' => {function => \&mark_ham,
124 'help' => {function => sub {pod2usage({verbose => 2});}}
127 pod2usage() if $options{help};
128 pod2usage({verbose=>2}) if $options{man};
130 $DEBUG = $options{debug};
133 $options{verbose} = $options{verbose} - $options{quiet};
135 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
137 my ($subcommand) = shift @ARGV;
138 if (not defined $subcommand) {
139 $subcommand = 'help';
140 print STDERR "You must provide a subcommand; displaying usage.\n";
142 } elsif (not exists $subcommands{$subcommand}) {
143 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
147 if (exists $options{spool_dir} and defined $options{spool_dir}) {
148 $config{spool_dir} = $options{spool_dir};
150 if ($subcommand ne 'help') {
151 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
154 handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
155 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
167 my ($spam_ham,$options,$opts,$config,$argv) = @_;
168 my $regex = shift @{$argv};
169 for my $bug_num (@{$argv}) {
170 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
171 die "Unable to open bug log spam for $bug_num";
173 my ($bn,$rec,$mid) = @_;
174 my $body = $rec->{text};
175 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
177 if ($subject =~ /\Q$regex\E/) {
180 if ($mid =~ /\Q$regex\E/) {
183 if ($spam_ham eq 'spam') {
184 $spam->add_spam($mid);
186 $spam->add_ham($mid);
196 my ($options,$opts,$config,$argv) = @_;
197 for my $bug_num (@{$argv}) {
199 spam_score_bug($bug_num,
201 $options->{spamc_opts});
202 print "$_->{score} $_->{message_id} $_->{subject}\n"
208 my ($options,$opts,$config,$argv) = @_;
210 for my $bug_num (@{$argv}) {
211 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
212 die "Unable to open bug log spam for $bug_num";
214 my ($bn,$rec,$mid) = @_;
215 if ($spam->is_spam($mid)) {
216 print STDERR "already spam\n" if $DEBUG;
219 if ($spam->is_ham($mid)) {
220 print STDERR "already ham\n" if $DEBUG;
223 my ($score,$is_spam,$report,$threshold) =
226 $options->{spamc_opts},
229 print STDERR "it's spam ($score)\n" if $DEBUG;
230 $spam->add_spam($mid);
231 } elsif ($score < $options->{ham_threshold}) {
232 print STDERR "it's really ham ($score)\n" if $DEBUG;
233 $spam->add_ham($mid);
236 print STDERR "it's ham ($score)\n" if $DEBUG;
246 my ($bug,$spamc,$spamc_opts) = @_;
250 my ($bn,$rec,$mid) = @_;
252 spam_score($rec,$spamc,$spamc_opts);
253 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
266 my ($record,$spamc,$spamc_opts) = @_;
267 my ($score,$threshold,$report);
270 my ($spamc_in,$spamc_out);
271 my $old_sig = $SIG{"PIPE"};
273 die "SIGPIPE in child for some reason";
276 open3($spamc_in,$spamc_out,0,
277 $spamc,'-E',@{$spamc_opts}) or
278 die "Unable to fork spamc: $!";
280 die "Unable to fork spamc";
282 print {$spamc_in} $record->{text};
283 close($spamc_in) or die "Unable to close spamc_in: $!";
284 waitpid($childpid,0);
288 my ($first_line,@report) = <$spamc_out>;
290 print STDERR "[$?;".($? >> 8)."] ";
291 print STDERR $first_line,@report;
294 if (defined $first_line) {
296 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
297 $report = join('',@report);
300 $SIG{"PIPE"} = $old_sig;
303 carp "processing of message failed [$@]\n";
306 return wantarray?($score,$is_spam,$report):$score;
310 my ($sub,$bug_num) = @_;
311 my $log = Debbugs::Log->new(bug_num => $bug_num) or
312 die "Unable to open bug log for $bug_num";
314 while (my $record = $log->read_record()) {
315 next if $record->{type} eq 'html';
316 next if $record->{type} eq 'autocheck';
317 my ($msg_id) = record_regex($record,
318 qr/^Message-Id:\s+<(.+)>/mi);
319 next unless defined $msg_id;
320 if ($msg_id =~ /$config{email_domain}$/) {
321 print STDERR "skipping $msg_id\n" if $DEBUG;
324 print STDERR "examining $msg_id: " if $DEBUG;
325 if ($seen_msgids{$msg_id}) {
326 print STDERR "already seen\n" if $DEBUG;
329 $seen_msgids{$msg_id}=1;
330 $sub->($bug_num,$record,$msg_id);
338 # cperl-indent-level: 4
339 # indent-tabs-mode: nil