From cea6c94fcc191edace23d2f0377021a3812f3b38 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Thu, 23 Feb 2023 16:48:28 -0800 Subject: [PATCH] check to make sure the pdf is good before processing --- scanner_workflow.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/scanner_workflow.py b/scanner_workflow.py index 787e34e..64514f2 100755 --- a/scanner_workflow.py +++ b/scanner_workflow.py @@ -9,6 +9,7 @@ from filelock import Timeout, FileLock import subprocess from logging import error, info, debug, warning import re +from time import sleep class ScannerWorkflowEvent(FileSystemEventHandler): @@ -98,6 +99,20 @@ class ScannerWorkflow: """Process a single PDF.""" pdf_file = Path(pdf_file) orig_pdf = pdf_file + # check that the pdf is good, otherwise wait to see if it + # might become good + pdf_good = False + for i in range(1, 10): + check = subprocess.run(["qpdf", "--check", pdf_file]) + if check.returncode == 0: + pdf_good = True + break + # sleep for 10 seconds if the PDF was bad + sleep(10) + if not pdf_good: + error(f"PDF was not good, skipping {orig_pdf} for now") + return + # move to the processing directory output_path = self.pdf_file_path(pdf_file.name) pdf_file = pdf_file.rename( -- 2.39.2