]> git.donarmstrong.com Git - debbugs.git/blobdiff - bin/debbugs-spam
add learn subcommand and examples
[debbugs.git] / bin / debbugs-spam
index 93051182c87e6818d9c29fa97021497225dce4f3..ba026da4547af6a1ac3b79d07df1e9af9c5b940f 100755 (executable)
@@ -78,11 +78,25 @@ id
 Mark messages as ham if there is a regex match to subject or message
 id
 
+=item B<learn>
+
+Learn from messages which are ham/spam
+
 =back
 
 
 =head1 EXAMPLES
 
+Start spamd:
+
+  /usr/sbin/spamd --socketpath=/home/debbugs/spamd_socket \
+      --nouser-config --cf='include /home/debbugs/.spamassassin/user_prefs' \
+      --cf='allow_user_rules 1' --allow-tell;
+
+Then score bugs:
+
+  debbugs-spam --spamc-opts '-U' --spamc-opts '/home/debbugs/spamd_socket' \
+      score 859123;
 
 =cut
 
@@ -118,8 +132,9 @@ handle_main_arguments(\%options,
 
 my %subcommands =
     ('auto-scan' => {function => \&auto_spamscan,
-                     arguments => {'ham_threshold=s' => -5,
+                     arguments => {'ham_threshold|ham-threshold=s' => 0,
                                   },
+                     defaults => {ham_threshold => -5},
                     },
      'score' => {function => \&score_bug,
                  arguments => {'skip_seen|skip-seen!' => 0
@@ -129,6 +144,8 @@ my %subcommands =
                     },
      'mark-ham' => {function => \&mark_ham,
                    },
+     'learn' => {fuction => \&learn,
+                },
      'help' => {function => sub {pod2usage({verbose => 2});}}
     );
 
@@ -159,7 +176,10 @@ if ($subcommand ne 'help') {
     chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";
 }
 my $opts =
-    handle_subcommand_arguments(\@ARGV,$subcommands{$subcommand}{arguments});
+    handle_subcommand_arguments(\@ARGV,
+                                $subcommands{$subcommand}{arguments},
+                                $subcommands{$subcommand}{defaults},
+                               );
 $subcommands{$subcommand}{function}->(\%options,$opts,\%config,\@ARGV);
 
 
@@ -188,11 +208,49 @@ sub mark_it {
                        if ($mid =~ /\Q$regex\E/) {
                            $is_match = 1;
                        }
-                       if ($spam_ham eq 'spam') {
-                           $spam->add_spam($mid);
+                       if ($is_match) {
+                           print STDERR "it's a match" if $DEBUG;
+                           if ($spam_ham eq 'spam') {
+                               $spam->add_spam($mid);
+                           } else {
+                               $spam->add_ham($mid);
+                           }
+                       }
+                   },
+                   $bug_num
+                  );
+        $spam->save();
+    }
+}
+
+sub learn {
+    my ($options,$opts,$config,$argv) = @_;
+    for my $bug_num (@{$argv}) {
+        my $spam = Debbugs::Log::Spam->new(bug_num => $bug_num) or
+            die "Unable to open bug log spam for $bug_num";
+        foreachmsg(sub {
+                       my ($bn,$rec,$mid) = @_;
+                       my $score;
+                       if ($spam->is_spam($mid)) {
+                           $score //=
+                               spam_score($rec,$options->{spamc},
+                                          [@{$options->{spamc_opts}},
+                                           '-L','spam'
+                                          ]
+                                         );
+                           print STDERR "learning spam" if $DEBUG;
+                       } elsif ($spam->is_ham($mid)) {
+                           $score //=
+                               spam_score($rec,$options->{spamc},
+                                          [@{$options->{spamc_opts}},
+                                           '-L','ham'
+                                          ]
+                                         );
+                           print STDERR "learning ham" if $DEBUG;
                        } else {
-                           $spam->add_ham($mid);
+                           print STDERR "not learning" if $DEBUG;
                        }
+                       print STDERR " from $mid" if $DEBUG;
                    },
                    $bug_num
                   );
@@ -238,7 +296,7 @@ sub auto_spamscan {
                        if ($is_spam) {
                            print STDERR "it's spam ($score)\n" if $DEBUG;
                            $spam->add_spam($mid);
-                       } elsif ($score < $options->{ham_threshold}) {
+                       } elsif ($score < $opts->{ham_threshold}) {
                            print STDERR "it's really ham ($score)\n" if $DEBUG;
                            $spam->add_ham($mid);
                        }
@@ -340,17 +398,18 @@ sub foreachmsg {
         my ($msg_id) = record_regex($record,
                                     qr/^Message-Id:\s+<(.+)>/mi);
         next unless defined $msg_id;
+        print STDERR "examining $msg_id: " if $DEBUG;
         if ($msg_id =~ /$config{email_domain}$/) {
-            print STDERR "skipping $msg_id\n" if $DEBUG;
+            print STDERR "skipping\n" if $DEBUG;
             next;
         }
-        print STDERR "examining $msg_id: " if $DEBUG;
         if ($seen_msgids{$msg_id}) {
             print STDERR "already seen\n" if $DEBUG;
             next;
         }
         $seen_msgids{$msg_id}=1;
         $sub->($bug_num,$record,$msg_id);
+        print STDERR "\n" if $DEBUG;
     }
 }