From 82676f2cd7110bb71df155b534d6cede0e0a9876 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Thu, 7 Jun 2018 13:33:08 -0700 Subject: [PATCH] add make_ocr_pdf command --- make_ocr_pdf | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 make_ocr_pdf diff --git a/make_ocr_pdf b/make_ocr_pdf new file mode 100755 index 0000000..753d69f --- /dev/null +++ b/make_ocr_pdf @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# Copyright 2018 by Don Armstrong +# Licensed under the terms of the GPL version 3 or any later version at your option. + + +import argparse +import subprocess +import re + +parser = argparse.ArgumentParser(description="Make a PDF from TIFF files with OCR") + +parser.add_argument('--output','-f',default='output.pdf') +parser.add_argument('tiff',nargs='+') +args = parser.parse_args() +# combine the tiff files into a multi-image tiff +subprocess.run(['e2mtiff','--output','temp.tiff']+args.tiff,check=True) +# tesseract appends pdf when we write to an output for some reason +output_name = re.sub(r"\.pdf$","",args.output) +subprocess.run(['tesseract','temp.tiff',output_name,'pdf'],check=True) + -- 2.39.2