output into a path instead of the same directory

author Don Armstrong <don@donarmstrong.com>

Fri, 24 Feb 2023 00:27:19 +0000 (16:27 -0800)

committer Don Armstrong <don@donarmstrong.com>

Fri, 24 Feb 2023 00:27:19 +0000 (16:27 -0800)
author Don Armstrong <don@donarmstrong.com>
Fri, 24 Feb 2023 00:27:19 +0000 (16:27 -0800)
committer Don Armstrong <don@donarmstrong.com>
Fri, 24 Feb 2023 00:27:19 +0000 (16:27 -0800)
diff --git a/scanner_workflow.py b/scanner_workflow.py

index be2b9db2ba7170a00896fcc6751b6d32d9cec8c1..e803f9ee1fa4da4e6f5df62922e0328df6d72011 100755 (executable)
--- a/scanner_workflow.py
+++ b/scanner_workflow.py
@@ -84,21 +84,33 @@ class ScannerWorkflow:
              )
          return name
  
+    def pdf_file_path(self, name: str):
+        res = re.match(
+            r"(?P<scanner>[^_]+)_(?P<month>\d{2})(?P<day>\d{2})(?P<year>\d{4})_"
+            r"(?P<time>\d+)_(?P<counter>\d+)\.pdf",
+            str(name),
+        )
+        if res:
+            return f"{res.group('year')}/{res.group('month')}_{res.group('day')}"
+        return ""
+
      def process_pdf(self, pdf_file: Union[Path, str]):
          """Process a single PDF."""
          pdf_file = Path(pdf_file)
          orig_pdf = pdf_file
          # move to the processing directory
+        output_path = self.pdf_file_path(pdf_file.name)
          pdf_file = pdf_file.rename(
              self.process_dir / self.calculate_name(pdf_file.name)
          )
-        output_file = self.output_dir / pdf_file.name
+        (self.output_dir / output_path).mkdir(parents=True, exist_ok=True)
+        output_file = self.output_dir / output_path / pdf_file.name
          res = subprocess.run(["ocrmypdf", *self.ocrmypdf_opts, pdf_file, output_file])
          if res.returncode != 0:
              error(f"Unable to properly OCR pdf: {res.stdout} {res.stderr}")
              return
          pdf_file.unlink()
-        info("Processed {orig_pdf} into {output_file}")
+        info(f"Processed {orig_pdf} into {output_file}")
  
      def event_loop(self):
          """Main event loop; called from the command line."""
author	Don Armstrong <don@donarmstrong.com>
	Fri, 24 Feb 2023 00:27:19 +0000 (16:27 -0800)
committer	Don Armstrong <don@donarmstrong.com>
	Fri, 24 Feb 2023 00:27:19 +0000 (16:27 -0800)