#!/usr/bin/env python3 # Copyright 2018 by Don Armstrong # Licensed under the terms of the GPL version 3 or any later version at your option. import argparse import subprocess import re parser = argparse.ArgumentParser(description="Make a PDF from TIFF files with OCR") parser.add_argument('--output','-f',default='output.pdf') parser.add_argument('tiff',nargs='+') args = parser.parse_args() # combine the tiff files into a multi-image tiff subprocess.run(['e2mtiff','--output','temp.tiff']+args.tiff,check=True) # tesseract appends pdf when we write to an output for some reason output_name = re.sub(r"\.pdf$","",args.output) subprocess.run(['tesseract','temp.tiff',output_name,'pdf'],check=True)