From: Don Armstrong Date: Fri, 24 Feb 2023 00:48:28 +0000 (-0800) Subject: check to make sure the pdf is good before processing X-Git-Url: https://git.donarmstrong.com/?p=scanner_workflow.git;a=commitdiff_plain;h=cea6c94fcc191edace23d2f0377021a3812f3b38 check to make sure the pdf is good before processing --- diff --git a/scanner_workflow.py b/scanner_workflow.py index 787e34e..64514f2 100755 --- a/scanner_workflow.py +++ b/scanner_workflow.py @@ -9,6 +9,7 @@ from filelock import Timeout, FileLock import subprocess from logging import error, info, debug, warning import re +from time import sleep class ScannerWorkflowEvent(FileSystemEventHandler): @@ -98,6 +99,20 @@ class ScannerWorkflow: """Process a single PDF.""" pdf_file = Path(pdf_file) orig_pdf = pdf_file + # check that the pdf is good, otherwise wait to see if it + # might become good + pdf_good = False + for i in range(1, 10): + check = subprocess.run(["qpdf", "--check", pdf_file]) + if check.returncode == 0: + pdf_good = True + break + # sleep for 10 seconds if the PDF was bad + sleep(10) + if not pdf_good: + error(f"PDF was not good, skipping {orig_pdf} for now") + return + # move to the processing directory output_path = self.pdf_file_path(pdf_file.name) pdf_file = pdf_file.rename(