]> git.donarmstrong.com Git - scanner_workflow.git/commitdiff
check to make sure the pdf is good before processing
authorDon Armstrong <don@donarmstrong.com>
Fri, 24 Feb 2023 00:48:28 +0000 (16:48 -0800)
committerDon Armstrong <don@donarmstrong.com>
Fri, 24 Feb 2023 00:48:28 +0000 (16:48 -0800)
scanner_workflow.py

index 787e34e593559320f774d925a4438eb3a589d398..64514f20e56a392c067e0a1764aff1c358323a87 100755 (executable)
@@ -9,6 +9,7 @@ from filelock import Timeout, FileLock
 import subprocess
 from logging import error, info, debug, warning
 import re
+from time import sleep
 
 
 class ScannerWorkflowEvent(FileSystemEventHandler):
@@ -98,6 +99,20 @@ class ScannerWorkflow:
         """Process a single PDF."""
         pdf_file = Path(pdf_file)
         orig_pdf = pdf_file
+        # check that the pdf is good, otherwise wait to see if it
+        # might become good
+        pdf_good = False
+        for i in range(1, 10):
+            check = subprocess.run(["qpdf", "--check", pdf_file])
+            if check.returncode == 0:
+                pdf_good = True
+                break
+            # sleep for 10 seconds if the PDF was bad
+            sleep(10)
+        if not pdf_good:
+            error(f"PDF was not good, skipping {orig_pdf} for now")
+            return
+
         # move to the processing directory
         output_path = self.pdf_file_path(pdf_file.name)
         pdf_file = pdf_file.rename(