From: Don Armstrong Date: Thu, 7 Jun 2018 20:33:08 +0000 (-0700) Subject: add make_ocr_pdf command X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=commitdiff_plain;h=82676f2cd7110bb71df155b534d6cede0e0a9876 add make_ocr_pdf command --- diff --git a/make_ocr_pdf b/make_ocr_pdf new file mode 100755 index 0000000..753d69f --- /dev/null +++ b/make_ocr_pdf @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# Copyright 2018 by Don Armstrong +# Licensed under the terms of the GPL version 3 or any later version at your option. + + +import argparse +import subprocess +import re + +parser = argparse.ArgumentParser(description="Make a PDF from TIFF files with OCR") + +parser.add_argument('--output','-f',default='output.pdf') +parser.add_argument('tiff',nargs='+') +args = parser.parse_args() +# combine the tiff files into a multi-image tiff +subprocess.run(['e2mtiff','--output','temp.tiff']+args.tiff,check=True) +# tesseract appends pdf when we write to an output for some reason +output_name = re.sub(r"\.pdf$","",args.output) +subprocess.run(['tesseract','temp.tiff',output_name,'pdf'],check=True) +