]> git.donarmstrong.com Git - debbugs.git/blobdiff - bin/debbugs-spam
add skip-missing argument to learn
[debbugs.git] / bin / debbugs-spam
index f416fb339386bf0a5c3e281ff59d025ce9f9930b..d8abcc6780abe12feb2eae479eb3e4b7279e4c72 100755 (executable)
@@ -78,6 +78,10 @@ id
 Mark messages as ham if there is a regex match to subject or message
 id
 
+=item B<learn>
+
+Learn from messages which have been marked as spam/ham
+
 =back
 
 
@@ -93,6 +97,7 @@ use Debbugs::Log qw(record_regex);
 use Debbugs::Log::Spam;
 use Debbugs::Config qw(:config);
 use Debbugs::Command qw(:all);
+use Debbugs::Common qw(getbuglocation);
 use IPC::Open3 qw(open3);
 use Carp;
 
@@ -102,25 +107,28 @@ my %options =
      man     => 0,
      verbose => 0,
      quiet   => 0,
-     quick   => 0,
      spamc   => 'spamc',
      spamc_opts => [],
     );
 
 handle_main_arguments(\%options,
-                      'quick|q',
                       'service|s',
                       'sysconfdir|c',
                       'spamc=s' => 0,
                       'spamc_opts|spamc-opts=s@' => 0,
                       'spool_dir|spool-dir=s',
+                      'quiet|q:+',
+                      'verbose|v:+',
                       'debug|d+','help|h|?','man|m');
 
 my %subcommands =
     ('auto-scan' => {function => \&auto_spamscan,
                      arguments => {'ham_threshold|ham-threshold=s' => 0,
+                                   'skip_missing|skip-missing!' => 0,
                                   },
-                     defaults => {ham_threshold => -5},
+                     defaults => {ham_threshold => -5,
+                                  skip_missing => 0,
+                                 },
                     },
      'score' => {function => \&score_bug,
                  arguments => {'skip_seen|skip-seen!' => 0
@@ -130,6 +138,12 @@ my %subcommands =
                     },
      'mark-ham' => {function => \&mark_ham,
                    },
+     'learn' => {function => \&learn,
+                 arguments => {'skip_missing|skip-missing!' => 0,
+                              },
+                 defaults => {skip_missing => 0,
+                             },
+                },
      'help' => {function => sub {pod2usage({verbose => 2});}}
     );
 
@@ -192,14 +206,18 @@ sub mark_it {
                        if ($mid =~ /\Q$regex\E/) {
                            $is_match = 1;
                        }
-                       if ($spam_ham eq 'spam') {
-                           $spam->add_spam($mid);
-                       } else {
-                           $spam->add_ham($mid);
+                       if ($is_match) {
+                           print STDERR "it's a match" if $DEBUG;
+                           if ($spam_ham eq 'spam') {
+                               $spam->add_spam($mid);
+                           } else {
+                               $spam->add_ham($mid);
+                           }
                        }
                    },
                    $bug_num
                   );
+        $spam->save();
     }
 }
 
@@ -218,10 +236,43 @@ sub score_bug {
     }
 }
 
+sub learn {
+    my ($options,$opts,$config,$argv) = @_;
+
+    for my $bug_num (@{$argv}) {
+        if ($opts->{skip_missing} and
+            not defined getbuglocation($bug_num,'log')) {
+            print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
+            next;
+        }
+        my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
+            die "Unable to open bug log spam for $bug_num";
+        foreachmsg(sub {
+                       my ($bn,$rec,$mid) = @_;
+                       if ($spam->is_spam($mid)) {
+                           spamc_learn_spam($rec,$options->{spamc},$options->{spamc_opts});
+                           return;
+                       }
+                       if ($spam->is_ham($mid)) {
+                           spamc_learn_ham($rec,$options->{spamc},$options->{spamc_opts});
+                           return;
+                       }
+                   },
+                   $bug_num,
+                  );
+        $spam->save();
+    }
+}
+
 sub auto_spamscan {
     my ($options,$opts,$config,$argv) = @_;
 
     for my $bug_num (@{$argv}) {
+        if ($opts->{skip_missing} and
+            not defined getbuglocation($bug_num,'log')) {
+            print STDERR "bug $bug_num does not exist\n" if $options->{verbose} > -1;
+            next;
+        }
         my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
             die "Unable to open bug log spam for $bug_num";
         foreachmsg(sub {
@@ -289,10 +340,11 @@ sub spam_score_bug {
     return @records;
 }
 
-sub spam_score {
+sub spamc_bug {
     my ($record,$spamc,$spamc_opts) = @_;
-    my ($score,$threshold,$report);
-    my $is_spam = 0;
+    my $first_line = '';
+    my $report = '';
+    my $exit_code = 0;
     eval {
         my ($spamc_in,$spamc_out);
         my $old_sig = $SIG{"PIPE"};
@@ -301,7 +353,7 @@ sub spam_score {
         };
         my $childpid =
             open3($spamc_in,$spamc_out,0,
-                  $spamc,'-E',@{$spamc_opts}) or
+                  $spamc,@{$spamc_opts}) or
                       die "Unable to fork spamc: $!";
         if (not $childpid) {
             die "Unable to fork spamc";
@@ -310,19 +362,10 @@ sub spam_score {
         close($spamc_in) or die "Unable to close spamc_in: $!";
         waitpid($childpid,0);
         if ($? >> 8) {
-            $is_spam = 1;
-        }
-        my ($first_line,@report) = <$spamc_out>;
-        if ($DEBUG) {
-            print STDERR "[$?;".($? >> 8)."] ";
-            print STDERR $first_line,@report;
-            print STDERR " ";
-        }
-        if (defined $first_line) {
-            chomp $first_line;
-            ($score,$threshold) = $first_line =~ m{^(-?[\d\.]+)/(-?[\d\.]+)$};
-            $report = join('',@report);
+            $exit_code = $? >> 8;
         }
+        local $/;
+        $report = <$spamc_out>;
         close($spamc_out);
         $SIG{"PIPE"} = $old_sig;
     };
@@ -330,7 +373,35 @@ sub spam_score {
         carp "processing of message failed [$@]\n";
         return undef;
     }
-    return wantarray?($score,$is_spam,$report):$score;
+    return ($exit_code,$report);
+}
+
+sub spam_score {
+    my ($record,$spamc,$spamc_opts) = @_;
+    my ($score,$threshold,$report,$exit_code);
+    ($exit_code,$report) =
+        spamc_bug($record,$spamc,[@{$spamc_opts},'-c']);
+    if (defined $report) {
+        ($score,$threshold) = $report =~ s{^(-?[\d\.]+)/(-?[\d\.]+)\n?}{};
+    }
+    return wantarray?($score,$exit_code,$report):$score;
+}
+
+sub spamc_learn_ham {
+    spamc_learn('ham',@_);
+}
+
+sub spamc_learn_forget {
+    spamc_learn('forget',@_);
+}
+
+sub spamc_learn_spam {
+    spamc_learn('spam',@_);
+}
+
+sub spamc_learn {
+    my ($type,$record,$spamc,$spamc_opts) = @_;
+    spamc_bug($record,$spamc,[@{$spamc_opts},'-L',$type])
 }
 
 sub foreachmsg {