2 # debbugs-spam is part of debbugs, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2012 by Don Armstrong <don@donarmstrong.com>.
11 use Getopt::Long qw(:config no_ignore_case);
16 debbugs-spam -- Scan log files for spam and populate nnn.log.spam
20 debbugs-spam [options] bugnumber [[bugnumber2]..]
23 --spool-dir debbugs spool directory
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debbugs spool directory; defaults to the value configured in the
35 debbugs configuration file.
43 Display brief useage information.
57 Automatically scan messages using spamassassin and mark messages as
58 spam which hit the threshold, and those that are highly negative as
63 Output the score of all of the messages in a bug
67 =item B<--skip-seen> Skip messages which have previously been classified
73 Mark messages as spam if there is a regex match to subject or message
78 Mark messages as ham if there is a regex match to subject or message
92 use Debbugs::Log qw(record_regex);
93 use Debbugs::Log::Spam;
94 use Debbugs::Config qw(:config);
95 use Debbugs::Command qw(:all);
96 use IPC::Open3 qw(open3);
110 handle_main_arguments(\%options,
115 'spamc_opts|spamc-opts=s@' => 0,
116 'spool_dir|spool-dir=s',
117 'debug|d+','help|h|?','man|m');
120 ('auto-scan' => {function => \&auto_spamscan,
121 arguments => {'ham_threshold|ham-threshold=s' => 0,
123 defaults => {ham_threshold => -5},
125 'score' => {function => \&score_bug,
126 arguments => {'skip_seen|skip-seen!' => 0
129 'mark-spam' => {function => \&mark_spam,
131 'mark-ham' => {function => \&mark_ham,
133 'help' => {function => sub {pod2usage({verbose => 2});}}
136 pod2usage() if $options{help};
137 pod2usage({verbose=>2}) if $options{man};
139 $DEBUG = $options{debug};
142 $options{verbose} = $options{verbose} - $options{quiet};
144 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
146 my ($subcommand) = shift @ARGV;
147 if (not defined $subcommand) {
148 $subcommand = 'help';
149 print STDERR "You must provide a subcommand; displaying usage.\n";
151 } elsif (not exists $subcommands{$subcommand}) {
152 print STDERR "$subcommand is not a valid subcommand; displaying usage.\n";
156 if (exists $options{spool_dir} and defined $options{spool_dir}) {
157 $config{spool_dir} = $options{spool_dir};
159 if ($subcommand ne 'help') {
160 chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
163 handle_subcommand_arguments(\@ARGV,
164 $subcommands{$subcommand}{arguments},
165 $subcommands{$subcommand}{defaults},
167 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
179 my ($spam_ham,$options,$opts,$config,$argv) = @_;
180 my $regex = shift @{$argv};
181 for my $bug_num (@{$argv}) {
182 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
183 die "Unable to open bug log spam for $bug_num";
185 my ($bn,$rec,$mid) = @_;
186 my $body = $rec->{text};
187 my ($subject) = $body =~ /^Subject: *(.+)$/mi;
189 if ($subject =~ /\Q$regex\E/) {
192 if ($mid =~ /\Q$regex\E/) {
196 print STDERR "it's a match" if $DEBUG;
197 if ($spam_ham eq 'spam') {
198 $spam->add_spam($mid);
200 $spam->add_ham($mid);
211 my ($options,$opts,$config,$argv) = @_;
212 for my $bug_num (@{$argv}) {
214 spam_score_bug($bug_num,
216 $options->{spamc_opts},
219 print "$_->{score} $_->{message_id} $_->{subject}\n"
225 my ($options,$opts,$config,$argv) = @_;
227 for my $bug_num (@{$argv}) {
228 my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
229 die "Unable to open bug log spam for $bug_num";
231 my ($bn,$rec,$mid) = @_;
232 if ($spam->is_spam($mid)) {
233 print STDERR "already spam\n" if $DEBUG;
236 if ($spam->is_ham($mid)) {
237 print STDERR "already ham\n" if $DEBUG;
240 my ($score,$is_spam,$report,$threshold) =
243 $options->{spamc_opts},
246 print STDERR "it's spam ($score)\n" if $DEBUG;
247 $spam->add_spam($mid);
248 } elsif ($score < $opts->{ham_threshold}) {
249 print STDERR "it's really ham ($score)\n" if $DEBUG;
250 $spam->add_ham($mid);
253 print STDERR "it's ham ($score)\n" if $DEBUG;
263 my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
267 $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
268 die "Unable to open bug log spam for $bug";
272 my ($bn,$rec,$mid) = @_;
275 if ($spam->is_spam($mid)) {
277 } elsif ($spam->is_ham($mid)) {
282 spam_score($rec,$spamc,$spamc_opts);
283 my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
296 my ($record,$spamc,$spamc_opts) = @_;
297 my ($score,$threshold,$report);
300 my ($spamc_in,$spamc_out);
301 my $old_sig = $SIG{"PIPE"};
303 die "SIGPIPE in child for some reason";
306 open3($spamc_in,$spamc_out,0,
307 $spamc,'-E',@{$spamc_opts}) or
308 die "Unable to fork spamc: $!";
310 die "Unable to fork spamc";
312 print {$spamc_in} $record->{text};
313 close($spamc_in) or die "Unable to close spamc_in: $!";
314 waitpid($childpid,0);
318 my ($first_line,@report) = <$spamc_out>;
320 print STDERR "[$?;".($? >> 8)."] ";
321 print STDERR $first_line,@report;
324 if (defined $first_line) {
326 ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
327 $report = join('',@report);
330 $SIG{"PIPE"} = $old_sig;
333 carp "processing of message failed [$@]\n";
336 return wantarray?($score,$is_spam,$report):$score;
340 my ($sub,$bug_num) = @_;
341 my $log = Debbugs::Log->new(bug_num => $bug_num) or
342 die "Unable to open bug log for $bug_num";
344 while (my $record = $log->read_record()) {
345 next if $record->{type} eq 'html';
346 next if $record->{type} eq 'autocheck';
347 my ($msg_id) = record_regex($record,
348 qr/^Message-Id:\s+<(.+)>/mi);
349 next unless defined $msg_id;
350 print STDERR "examining $msg_id: " if $DEBUG;
351 if ($msg_id =~ /$config{email_domain}$/) {
352 print STDERR "skipping\n" if $DEBUG;
355 if ($seen_msgids{$msg_id}) {
356 print STDERR "already seen\n" if $DEBUG;
359 $seen_msgids{$msg_id}=1;
360 $sub->($bug_num,$record,$msg_id);
361 print STDERR "\n" if $DEBUG;
369 # cperl-indent-level: 4
370 # indent-tabs-mode: nil