]> git.donarmstrong.com Git - scanner_workflow.git/blobdiff - scanner_workflow.py
add annotations to ScannerWorkflowEvent
[scanner_workflow.git] / scanner_workflow.py
index 64514f20e56a392c067e0a1764aff1c358323a87..ab7b907c877007c63fde67af1e261708aef053c5 100755 (executable)
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 
+from __future__ import annotations
+
 import click
 from watchdog.observers import Observer
 from watchdog.events import FileSystemEventHandler, FileSystemEvent
@@ -15,13 +17,13 @@ from time import sleep
 class ScannerWorkflowEvent(FileSystemEventHandler):
     """Subclass of FileSystemEventHandler to handle OCRing PDFs"""
 
-    scanner_workflow = None
+    scanner_workflow: ScannerWorkflow
 
-    def __init__(self, scanner_workflow=None):
+    def __init__(self, scanner_workflow: ScannerWorkflow):
         super().__init__()
         self.scanner_workflow = scanner_workflow
         if not self.scanner_workflow:
-            raise Error("No scanner_workflow passed to ScannerWorkflowEvent")
+            raise Exception("No scanner_workflow passed to ScannerWorkflowEvent")
 
     def on_any_event(self, event: FileSystemEvent):
         if event.is_directory:
@@ -107,8 +109,16 @@ class ScannerWorkflow:
             if check.returncode == 0:
                 pdf_good = True
                 break
-            # sleep for 10 seconds if the PDF was bad
-            sleep(10)
+            file_size = pdf_file.stat().st_size
+            # sleep in a loop for 10 seconds if the file size is still
+            # increasing
+            while True:
+                sleep(10)
+                new_size = pdf_file.stat().st_size
+                if new_size > file_size:
+                    file_size = new_size
+                else:
+                    break
         if not pdf_good:
             error(f"PDF was not good, skipping {orig_pdf} for now")
             return