import subprocess
from logging import error, info, debug, warning
import re
+from time import sleep
class ScannerWorkflowEvent(FileSystemEventHandler):
"""Process a single PDF."""
pdf_file = Path(pdf_file)
orig_pdf = pdf_file
+ # check that the pdf is good, otherwise wait to see if it
+ # might become good
+ pdf_good = False
+ for i in range(1, 10):
+ check = subprocess.run(["qpdf", "--check", pdf_file])
+ if check.returncode == 0:
+ pdf_good = True
+ break
+ # sleep for 10 seconds if the PDF was bad
+ sleep(10)
+ if not pdf_good:
+ error(f"PDF was not good, skipping {orig_pdf} for now")
+ return
+
# move to the processing directory
output_path = self.pdf_file_path(pdf_file.name)
pdf_file = pdf_file.rename(
output_file = self.output_dir / output_path / pdf_file.name
res = subprocess.run(["ocrmypdf", *self.ocrmypdf_opts, pdf_file, output_file])
if res.returncode != 0:
- error(f"Unable to properly OCR pdf: {res.stdout} {res.stderr}")
+ error(
+ f"Unable to properly OCR pdf {orig_pdf} into {output_file}: {res.stdout} {res.stderr}"
+ )
return
pdf_file.unlink()
info(f"Processed {orig_pdf} into {output_file}")