]> git.donarmstrong.com Git - bin.git/blobdiff - learn_from_spam
abstract out run through spam
[bin.git] / learn_from_spam
index aca7f219858159fa65646e90d6b8a074b5aa73a3..24db173bdab61b8229c05d05085c546420a89fa9 100755 (executable)
@@ -6,31 +6,42 @@ HAM_REPORT="spamassassin --revoke"
 SPAM_CHECK="spamassassin -e"
 
 # these are for spamc
-SPAM_REPORT="spamc --learntype=spam"
-HAM_REPORT="spamc --learntype=ham"
-SPAM_CHECK="spamc -c"
+SPAM_REPORT="spamc --learntype=spam -s $((100 * 1024 * 1024))"
+HAM_REPORT="spamc --learntype=ham -s $((100 * 1024 * 1024))"
+SPAM_CHECK="spamc -s $((100 * 1024 * 1024)) -c"
 
 # needs rules directories
-NEEDS_SPAM_RULES=~/Maildir/spam/needs_rules/cur/
-NEEDS_HAM_RULES=~/Maildir/spam/needs_ham_rules/cur/
+NEEDS_SPAM_RULES="$(echo ~/Maildir/spam/needs_rules)"
+NEEDS_HAM_RULES="$(echo ~/Maildir/spam/needs_ham_rules)"
+
+run_through_spamc () {
+    # destdir
+    DESTDIR="$1"
+    FILENAME="$2"
+    mkdir -p "$1"
+    TMP="$(mktemp -d)"
+    DESTFILE="$DESTDIR/$(basename "$FILENAME")"
+    DESTFILE_NO_SIZE="$DESTDIR/$(basename "$FILENAME"|sed 's/,S=.*//g')"
+    spamc -s $((100 * 1024 * 1024)) < "$FILENAME" > "$TMP/file" 2>/dev/null;
+    mv "$TMP/file" "DESTFILE"
+    mv "$DESTFILE" "$DESTFILE_NO_SIZE"
+    trap - EXIT;
+    rmdir "$TMP";
+}
 
 if [ "$(basename $0)" = "learn_from_spam" ]; then
     for file in "$@"; do
         if ! formail -c < "$file"|grep -q '^List-Id'; then
             $SPAM_REPORT < "$file" >/dev/null 2>&1;
-        fi;
+        else
+            spamasssin --local --report < "$file" >/dev/null 2>&1;
+        fi
         formail -c < "$file" | grep -e '^From ' -e 'From: ' | spamassassin --add-to-blacklist >/dev/null 2>&1;
         formail -c < "$file" | grep -e '^From ' -e 'From: ' | spamassassin --remove-from-whitelist >/dev/null 2>&1;
-        # check to see if it's still spam
-        if ! $SPAM_CHECK < "$file" >/dev/null 2>&1; then
-            TMP="$(mktemp -d)"
-            trap "rm -f '$TMP'/file; rmdir '$TMP'" EXIT
-            # this message is still not spam; may need custom rules
-            spamc < "$file" > "$TMP/file" 2>/dev/null;
-            mv "$TMP/file" "${NEEDS_SPAM_RULES}$(basename "$file")";
-            trap - EXIT;
-            rmdir "$TMP";
-        elif [ "$file" = "${NEEDS_SPAM_RULES}$(basename "$file")" ]; then
+        # check to see if it's still ham
+        if $SPAM_CHECK < "$file" >/dev/null 2>&1; then
+            run_through_spamc "${NEEDS_SPAM_RULES}/cur" "$file";
+        elif [ "$(pwd)" = "${NEEDS_SPAM_RULES}" ]; then
             rm -f "$file";
         fi;
     done;
@@ -38,15 +49,9 @@ elif [ "$(basename $0)" = "learn_from_ham" ]; then
     for file in "$@"; do
         $HAM_REPORT < "$file" >/dev/null 2>&1;
         formail -c < "$file" |grep -e '^From ' -e 'From: ' -e 'To: ' -e 'Cc: '|spamassassin --add-to-whitelist >/dev/null 2>&1;
-        if $SPAM_CHECK < "$file" >/dev/null 2>&1; then
-            mkdir -p ~/Maildir/spam/needs_ham_rules/cur;
-            TMP="$(mktemp -d)"
-            trap "rm -f '$TMP'/file; rmdir '$TMP'" EXIT
-            spamc < "$file" > "$TMP/file" 2>/dev/null;
-            mv "$TMP/file" "${NEEDS_HAM_RULES}$(basename "$file")";
-            trap - EXIT;
-            rmdir "$TMP";
-        elif [ "$file" = "${NEEDS_HAM_RULES}$(basename "$file")" ]; then
+        if ! $SPAM_CHECK < "$file" >/dev/null 2>&1; then
+            run_through_spamc "${NEEDS_HAM_RULES}/cur" "$file";
+        elif [ "$(pwd)" = "${NEEDS_HAM_RULES}" ]; then
             rm -f "$file";
         fi;
     done;