from filelock import Timeout, FileLock
import subprocess
from logging import error, info, debug, warning
+import re
class ScannerWorkflowEvent(FileSystemEventHandler):
self.process_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
+ def calculate_name(self, name: str):
+ res = re.match(
+ r"(?P<scanner>[^_]+)_(?P<month>\d{2})(?P<day>\d{2})(?P<year>\d{4})_"
+ r"(?P<time>\d+)_(?P<counter>\d+)\.pdf",
+ str(name),
+ )
+ if res:
+ name = (
+ f"{res.group('scanner')}_"
+ f"{res.group('year')}{res.group('month')}{res.group('day')}_"
+ f"{res.group('time')}_{res.group('counter')}.pdf"
+ )
+ return name
+
def process_pdf(self, pdf_file: Union[Path, str]):
"""Process a single PDF."""
pdf_file = Path(pdf_file)
# move to the processing directory
- pdf_file = pdf_file.rename(self.process_dir / pdf_file.name)
+ pdf_file = pdf_file.rename(
+ self.process_dir / self.calculate_name(pdf_file.name)
+ )
res = subprocess.run(
["ocrmypdf", *self.ocrmypdf_opts, pdf_file, self.output_dir / pdf_file.name]
)