]> git.donarmstrong.com Git - bin.git/blobdiff - learn_from_spam
abstract out run through spam
[bin.git] / learn_from_spam
index 88545f85dbb848ff9d714fa6b94f8fe29add8c1d..24db173bdab61b8229c05d05085c546420a89fa9 100755 (executable)
@@ -6,24 +6,54 @@ HAM_REPORT="spamassassin --revoke"
 SPAM_CHECK="spamassassin -e"
 
 # these are for spamc
-SPAM_REPORT="spamc --learntype=spam"
-HAM_REPORT="spamc --learntype=ham"
-SPAM_CHECK="spamc -c"
+SPAM_REPORT="spamc --learntype=spam -s $((100 * 1024 * 1024))"
+HAM_REPORT="spamc --learntype=ham -s $((100 * 1024 * 1024))"
+SPAM_CHECK="spamc -s $((100 * 1024 * 1024)) -c"
+
+# needs rules directories
+NEEDS_SPAM_RULES="$(echo ~/Maildir/spam/needs_rules)"
+NEEDS_HAM_RULES="$(echo ~/Maildir/spam/needs_ham_rules)"
+
+run_through_spamc () {
+    # destdir
+    DESTDIR="$1"
+    FILENAME="$2"
+    mkdir -p "$1"
+    TMP="$(mktemp -d)"
+    DESTFILE="$DESTDIR/$(basename "$FILENAME")"
+    DESTFILE_NO_SIZE="$DESTDIR/$(basename "$FILENAME"|sed 's/,S=.*//g')"
+    spamc -s $((100 * 1024 * 1024)) < "$FILENAME" > "$TMP/file" 2>/dev/null;
+    mv "$TMP/file" "DESTFILE"
+    mv "$DESTFILE" "$DESTFILE_NO_SIZE"
+    trap - EXIT;
+    rmdir "$TMP";
+}
 
 if [ "$(basename $0)" = "learn_from_spam" ]; then
     for file in "$@"; do
-        $SPAM_REPORT < "$file" >/dev/null;
-        grep -e '^From ' -e 'From: ' "$file" | spamassassin -R >/dev/null;
-        # check to see if it's still spam
-        if ! spamc -c < "$file"; then
-            # this message is still not spam; may need custom rules
-            cp "$file" ~/Maildir/spam/needs_rules/cur/;
+        if ! formail -c < "$file"|grep -q '^List-Id'; then
+            $SPAM_REPORT < "$file" >/dev/null 2>&1;
+        else
+            spamasssin --local --report < "$file" >/dev/null 2>&1;
+        fi
+        formail -c < "$file" | grep -e '^From ' -e 'From: ' | spamassassin --add-to-blacklist >/dev/null 2>&1;
+        formail -c < "$file" | grep -e '^From ' -e 'From: ' | spamassassin --remove-from-whitelist >/dev/null 2>&1;
+        # check to see if it's still ham
+        if $SPAM_CHECK < "$file" >/dev/null 2>&1; then
+            run_through_spamc "${NEEDS_SPAM_RULES}/cur" "$file";
+        elif [ "$(pwd)" = "${NEEDS_SPAM_RULES}" ]; then
+            rm -f "$file";
         fi;
     done;
 elif [ "$(basename $0)" = "learn_from_ham" ]; then
     for file in "$@"; do
-        $HAM_REPORT < "$file" >/dev/null;
-        grep -e '^From ' -e 'From: ' -e 'To: ' -e 'Cc: ' "$file"|spamassassin -W>/dev/null;
+        $HAM_REPORT < "$file" >/dev/null 2>&1;
+        formail -c < "$file" |grep -e '^From ' -e 'From: ' -e 'To: ' -e 'Cc: '|spamassassin --add-to-whitelist >/dev/null 2>&1;
+        if ! $SPAM_CHECK < "$file" >/dev/null 2>&1; then
+            run_through_spamc "${NEEDS_HAM_RULES}/cur" "$file";
+        elif [ "$(pwd)" = "${NEEDS_HAM_RULES}" ]; then
+            rm -f "$file";
+        fi;
     done;
 else
     "Called in a way this script cannot recognize";