]> git.donarmstrong.com Git - debbugs.git/blobdiff - bin/debbugs-spam
remove quick option and add quiet option
[debbugs.git] / bin / debbugs-spam
index 371fc572d8d3c99807d06b1f3e02f8ea8d6a8e23..613eec279f295ea5f8cbf7e70af9c44d10e65e95 100755 (executable)
@@ -62,6 +62,12 @@ ham.
 
 Output the score of all of the messages in a bug
 
+=over
+
+=item B<--skip-seen> Skip messages which have previously been classified
+
+=back
+
 =item B<mark-spam>
 
 Mark messages as spam if there is a regex match to subject or message
@@ -87,6 +93,7 @@ use Debbugs::Log qw(record_regex);
 use Debbugs::Log::Spam;
 use Debbugs::Config qw(:config);
 use Debbugs::Command qw(:all);
+use Debbugs::Common qw(getbuglocation);
 use IPC::Open3 qw(open3);
 use Carp;
 
@@ -96,26 +103,32 @@ my %options =
      man     => 0,
      verbose => 0,
      quiet   => 0,
-     quick   => 0,
      spamc   => 'spamc',
      spamc_opts => [],
     );
 
 handle_main_arguments(\%options,
-                      'quick|q',
                       'service|s',
                       'sysconfdir|c',
                       'spamc=s' => 0,
                       'spamc_opts|spamc-opts=s@' => 0,
                       'spool_dir|spool-dir=s',
+                      'quiet|q:+',
+                      'verbose|v:+',
                       'debug|d+','help|h|?','man|m');
 
 my %subcommands =
     ('auto-scan' => {function => \&auto_spamscan,
-                     arguments => {'ham_threshold=s' => -5,
+                     arguments => {'ham_threshold|ham-threshold=s' => 0,
+                                   'skip_missing|skip-missing!' => 0,
                                   },
+                     defaults => {ham_threshold => -5,
+                                  skip_missing => 0,
+                                 },
                     },
      'score' => {function => \&score_bug,
+                 arguments => {'skip_seen|skip-seen!' => 0
+                              },
                 },
      'mark-spam' => {function => \&mark_spam,
                     },
@@ -151,7 +164,10 @@ if ($subcommand ne 'help') {
     chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
 }
 my $opts =
-    handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
+    handle_subcommand_arguments(\@ARGV,
+                                $subcommands{$subcommand}{arguments},
+                                $subcommands{$subcommand}{defaults},
+                               );
 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
 
 
@@ -180,14 +196,18 @@ sub mark_it {
                        if ($mid =~ /\Q$regex\E/) {
                            $is_match = 1;
                        }
-                       if ($spam_ham eq 'spam') {
-                           $spam->add_spam($mid);
-                       } else {
-                           $spam->add_ham($mid);
+                       if ($is_match) {
+                           print STDERR "it's a match" if $DEBUG;
+                           if ($spam_ham eq 'spam') {
+                               $spam->add_spam($mid);
+                           } else {
+                               $spam->add_ham($mid);
+                           }
                        }
                    },
                    $bug_num
                   );
+        $spam->save();
     }
 }
 
@@ -197,8 +217,10 @@ sub score_bug {
     for my $bug_num (@{$argv}) {
         my @bug_score =
             spam_score_bug($bug_num,
-                           $opts->{spamc},
-                           $opts->{spamc_opts});
+                           $options->{spamc},
+                           $options->{spamc_opts},
+                           $opts->{skip_seen},
+                          );
         print "$_->{score} $_->{message_id} $_->{subject}\n"
             foreach @bug_score;
     }
@@ -208,6 +230,11 @@ sub auto_spamscan {
     my ($options,$opts,$config,$argv) = @_;
 
     for my $bug_num (@{$argv}) {
+        if ($opts->{skip_missing} and
+            not defined getbuglocation($bug_num,'log')) {
+            print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
+            next;
+        }
         my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
             die "Unable to open bug log spam for $bug_num";
         foreachmsg(sub {
@@ -228,7 +255,7 @@ sub auto_spamscan {
                        if ($is_spam) {
                            print STDERR "it's spam ($score)\n" if $DEBUG;
                            $spam->add_spam($mid);
-                       } elsif ($score < $options->{ham_threshold}) {
+                       } elsif ($score < $opts->{ham_threshold}) {
                            print STDERR "it's really ham ($score)\n" if $DEBUG;
                            $spam->add_ham($mid);
                        }
@@ -243,17 +270,31 @@ sub auto_spamscan {
 }
 
 sub spam_score_bug {
-    my ($bug,$spamc,$spamc_opts) = @_;
+    my ($bug,$spamc,$spamc_opts,$skip_seen) = @_;
 
+    my $spam;
+    if ($skip_seen) {
+        $spam = Debbugs::Log::Spam->new(bug_num => $bug) or
+            die "Unable to open bug log spam for $bug";
+    }
     my @records;
     foreachmsg(sub {
                    my ($bn,$rec,$mid) = @_;
-                   my $score =
+                   my $score;
+                   if ($skip_seen) {
+                       if ($spam->is_spam($mid)) {
+                           $score = 999;
+                       } elsif ($spam->is_ham($mid)) {
+                           $score = -999;
+                       }
+                   }
+                   $score //=
                        spam_score($rec,$spamc,$spamc_opts);
+                   my ($subject) = $rec->{text} =~ /^Subject: *(.+)$/mi;
                    push @records,
                       {message_id => $mid,
                        score => $score,
-                       subject => ($rec->{text} =~ /^Subject: *(.+)/i)[0],
+                       subject => $subject,
                       };
                },
                $bug
@@ -278,25 +319,25 @@ sub spam_score {
         if (not $childpid) {
             die "Unable to fork spamc";
         }
-        print {$spamc} $record->{text};
-        close($spamc) or die "Unable to close spamc: $!";
+        print {$spamc_in} $record->{text};
+        close($spamc_in) or die "Unable to close spamc_in: $!";
         waitpid($childpid,0);
-        if ($DEBUG) {
-            print STDERR "[$?;".($? >> 8)."] ";
-            print STDERR map {s/\n//; $_ } <$spamc_out>;
-            print STDERR " ";
-        }
-        close($spamc_out);
-        $SIG{"PIPE"} = $old_sig;
         if ($? >> 8) {
             $is_spam = 1;
         }
         my ($first_line,@report) = <$spamc_out>;
+        if ($DEBUG) {
+            print STDERR "[$?;".($? >> 8)."] ";
+            print STDERR $first_line,@report;
+            print STDERR " ";
+        }
         if (defined $first_line) {
             chomp $first_line;
             ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
             $report = join('',@report);
         }
+        close($spamc_out);
+        $SIG{"PIPE"} = $old_sig;
     };
     if ($@) {
         carp "processing of message failed [$@]\n";
@@ -316,17 +357,18 @@ sub foreachmsg {
         my ($msg_id) = record_regex($record,
                                     qr/^Message-Id:\s+<(.+)>/mi);
         next unless defined $msg_id;
+        print STDERR "examining $msg_id: " if $DEBUG;
         if ($msg_id =~ /$config{email_domain}$/) {
-            print STDERR "skipping $msg_id\n" if $DEBUG;
+            print STDERR "skipping\n" if $DEBUG;
             next;
         }
-        print STDERR "examining $msg_id: " if $DEBUG;
         if ($seen_msgids{$msg_id}) {
             print STDERR "already seen\n" if $DEBUG;
             next;
         }
         $seen_msgids{$msg_id}=1;
         $sub->($bug_num,$record,$msg_id);
+        print STDERR "\n" if $DEBUG;
     }
 }