]> git.donarmstrong.com Git - scanner_workflow.git/commitdiff
rename files to follow iso8601
authorDon Armstrong <don@donarmstrong.com>
Tue, 24 Jan 2023 06:10:55 +0000 (22:10 -0800)
committerDon Armstrong <don@donarmstrong.com>
Tue, 24 Jan 2023 06:10:55 +0000 (22:10 -0800)
scanner_workflow.py

index a65decc22b204f295f5e20b530fa066a99e05b9f..96346be86c859bdd4f51997d751dc9b3995fa940 100755 (executable)
@@ -8,6 +8,7 @@ from typing import Union
 from filelock import Timeout, FileLock
 import subprocess
 from logging import error, info, debug, warning
 from filelock import Timeout, FileLock
 import subprocess
 from logging import error, info, debug, warning
+import re
 
 
 class ScannerWorkflowEvent(FileSystemEventHandler):
 
 
 class ScannerWorkflowEvent(FileSystemEventHandler):
@@ -67,11 +68,27 @@ class ScannerWorkflow:
         self.process_dir.mkdir(parents=True, exist_ok=True)
         self.output_dir.mkdir(parents=True, exist_ok=True)
 
         self.process_dir.mkdir(parents=True, exist_ok=True)
         self.output_dir.mkdir(parents=True, exist_ok=True)
 
+    def calculate_name(self, name: str):
+        res = re.match(
+            r"(?P<scanner>[^_]+)_(?P<month>\d{2})(?P<day>\d{2})(?P<year>\d{4})_"
+            r"(?P<time>\d+)_(?P<counter>\d+)\.pdf",
+            str(name),
+        )
+        if res:
+            name = (
+                f"{res.group('scanner')}_"
+                f"{res.group('year')}{res.group('month')}{res.group('day')}_"
+                f"{res.group('time')}_{res.group('counter')}.pdf"
+            )
+        return name
+
     def process_pdf(self, pdf_file: Union[Path, str]):
         """Process a single PDF."""
         pdf_file = Path(pdf_file)
         # move to the processing directory
     def process_pdf(self, pdf_file: Union[Path, str]):
         """Process a single PDF."""
         pdf_file = Path(pdf_file)
         # move to the processing directory
-        pdf_file = pdf_file.rename(self.process_dir / pdf_file.name)
+        pdf_file = pdf_file.rename(
+            self.process_dir / self.calculate_name(pdf_file.name)
+        )
         res = subprocess.run(
             ["ocrmypdf", *self.ocrmypdf_opts, pdf_file, self.output_dir / pdf_file.name]
         )
         res = subprocess.run(
             ["ocrmypdf", *self.ocrmypdf_opts, pdf_file, self.output_dir / pdf_file.name]
         )