]> git.donarmstrong.com Git - scanner_workflow.git/commitdiff
rename files to follow iso8601
authorDon Armstrong <don@donarmstrong.com>
Tue, 24 Jan 2023 06:10:55 +0000 (22:10 -0800)
committerDon Armstrong <don@donarmstrong.com>
Tue, 24 Jan 2023 06:10:55 +0000 (22:10 -0800)
scanner_workflow.py

index a65decc22b204f295f5e20b530fa066a99e05b9f..96346be86c859bdd4f51997d751dc9b3995fa940 100755 (executable)
@@ -8,6 +8,7 @@ from typing import Union
 from filelock import Timeout, FileLock
 import subprocess
 from logging import error, info, debug, warning
+import re
 
 
 class ScannerWorkflowEvent(FileSystemEventHandler):
@@ -67,11 +68,27 @@ class ScannerWorkflow:
         self.process_dir.mkdir(parents=True, exist_ok=True)
         self.output_dir.mkdir(parents=True, exist_ok=True)
 
+    def calculate_name(self, name: str):
+        res = re.match(
+            r"(?P<scanner>[^_]+)_(?P<month>\d{2})(?P<day>\d{2})(?P<year>\d{4})_"
+            r"(?P<time>\d+)_(?P<counter>\d+)\.pdf",
+            str(name),
+        )
+        if res:
+            name = (
+                f"{res.group('scanner')}_"
+                f"{res.group('year')}{res.group('month')}{res.group('day')}_"
+                f"{res.group('time')}_{res.group('counter')}.pdf"
+            )
+        return name
+
     def process_pdf(self, pdf_file: Union[Path, str]):
         """Process a single PDF."""
         pdf_file = Path(pdf_file)
         # move to the processing directory
-        pdf_file = pdf_file.rename(self.process_dir / pdf_file.name)
+        pdf_file = pdf_file.rename(
+            self.process_dir / self.calculate_name(pdf_file.name)
+        )
         res = subprocess.run(
             ["ocrmypdf", *self.ocrmypdf_opts, pdf_file, self.output_dir / pdf_file.name]
         )