X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=make_djvu;h=17262008693ae6e490340186cbf240e960a96ab6;hb=57e5edd190fa2c3fff39ecc93660eb8e582cdb33;hp=2f153d7d1c45fd045e9a0a2ab31dd4439dfe6e44;hpb=116b2ffd4a361f45089460dc4ccda8acfbbdf40e;p=bin.git diff --git a/make_djvu b/make_djvu index 2f153d7..1726200 100755 --- a/make_djvu +++ b/make_djvu @@ -79,13 +79,29 @@ if (not @ARGV) { pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; +sub is_bitonal { + my $file = shift; + open(my $fh,'-|','identify',$file) or die "Unable to execute identify $file: $!"; + local $/; + my $res = <$fh>; + if ($res =~ /Bilevel/) { + return 1; + } + return 0; +} + my @djvu_files; for my $tiff (@ARGV) { my $base_name = $tiff; $base_name =~ s/\.[^\.]+$//; - system('cjb2',$base_name,$base_name.'.djvu'); + if (is_bitonal($tiff)) { + system('cjb2',$tiff,$base_name.'.djvu'); + } else { + system('convert',$tiff,$base_name.'.ppm'); + system('c44','-dpi',600,$base_name.'.ppm',$base_name.'.djvu'); + } system('tesseract',$tiff,$base_name.'.ocr'); - my $fh = IO::File->new($base_name.'.ocr','r'); + my $fh = IO::File->new($base_name.'.ocr.txt','r'); my $wf = IO::File->new($base_name.'.txt','w'); print {$wf} "(page 0 0 1 1\n"; if (defined $fh) { @@ -98,9 +114,9 @@ for my $tiff (@ARGV) { } print {$wf} ")\n"; close $wf; - unlink($base_name.'.ocr') if -e $base_name.'.ocr'; - system('dvjused',$base_name.'.djvu','-e','select 1; remove-txt','-s'); - system('dvjused',$base_name.'.djvu','-e','select 1; set-txt '.$base_name.'.txt','-s'); + unlink($base_name.'.ocr.txt') if -e $base_name.'.ocr.txt'; + system('djvused',$base_name.'.djvu','-e','select 1; remove-txt','-s'); + system('djvused',$base_name.'.djvu','-e','select 1; set-txt '.$base_name.'.txt','-s'); unlink($base_name.'.txt') if -e $base_name.'.txt'; push @djvu_files,$base_name.'.djvu'; }