X-Git-Url: https://git.donarmstrong.com/?p=deb_pkgs%2Fscowl.git;a=blobdiff_plain;f=mk-list;h=453b31352b6be793aceceea5912ba8b6a3137107;hp=77750af4152cfe79c546f2d0df3ea51bfe178410;hb=ee6e3b27f3b311265ea5231be71455767b829693;hpb=adbd0c3d3296ead6c49b4f13012aca4a558efc7f diff --git a/mk-list b/mk-list index 77750af..453b313 100755 --- a/mk-list +++ b/mk-list @@ -12,6 +12,7 @@ my $file_names_only; my $dir; my $no_implied; my $variants_str; +my $accents; sub usage() { print STDERR "$0 [-v#] [] \n"; @@ -22,23 +23,27 @@ GetOptions ("dir|d=s", \$dir, "with-variants|v=i", => \$with_variants, "variants=s", \$variants_str, "file-names-only|f", \$file_names_only, - "no-implied", \$no_implied) or usage(); + "no-implied", \$no_implied, + "accents=s", \$accents) or usage(); $dir = "final" unless defined $dir; +die "Cannot specify both --file-names-only and --accents" if $file_names_only && $accents; + my %spelling_map = qw(en-us american en-gb british? en-gb-ise british - en-gb-ize british_z en-gb-oed british_z en-ca canadian); + en-gb-ize british_z en-gb-oed british_z en-ca canadian en-au australian); $spelling_map{$_} = $_ foreach (qw(english special american british canadian - variant_0 variant_1 variant_2 - british_variant_0 british_variant_1 - canadian_variant_0 canadian_variant_1)); + variant_1 variant_2 variant_3 + british_variant_1 british_variant_2 + canadian_variant_1 canadian_variant_2 + australian_variant_1 australian_variant_2)); die "Cannot specify both --with-variants and --variants" if $with_variants && $variants_str; $variants_str = "non" unless defined $variants_str || defined $with_variants; -$variants_str = join(',', 'non', 0..$with_variants) if defined $with_variants; +$variants_str = join(',', 'non', 1..$with_variants) if defined $with_variants; my %use_variant_level; foreach (split ',', $variants_str) { - die "Invalid variant num: $_" unless /^(non|[012])$/; + die "Invalid variant num: $_" unless /^(non|[123])$/; $use_variant_level{$_} = 1; } @@ -63,13 +68,15 @@ $SPS{$_} = 1 foreach (@SPS); $SPS{english} = 1 unless $no_implied; $SPS{special} = 1 unless $no_implied; -$SPS{variant_0} = 1 if $SPS{american} && $use_variant_level{0}; -$SPS{variant_1} = 1 if $SPS{american} && $use_variant_level{1}; -$SPS{british_variant_0} = 1 if ($SPS{british} || $SPS{british_z}) && $use_variant_level{0}; +$SPS{variant_1} = 1 if $SPS{american} && $use_variant_level{1}; +$SPS{variant_2} = 1 if $SPS{american} && $use_variant_level{2}; $SPS{british_variant_1} = 1 if ($SPS{british} || $SPS{british_z}) && $use_variant_level{1}; -$SPS{canadian_variant_0} = 1 if $SPS{canadian} && $use_variant_level{0}; +$SPS{british_variant_2} = 1 if ($SPS{british} || $SPS{british_z}) && $use_variant_level{2}; $SPS{canadian_variant_1} = 1 if $SPS{canadian} && $use_variant_level{1}; -$SPS{variant_2} = 1 if $use_variant_level{2}; +$SPS{canadian_variant_2} = 1 if $SPS{canadian} && $use_variant_level{2}; +$SPS{australian_variant_1} = 1 if $SPS{australian} && $use_variant_level{1}; +$SPS{australian_variant_2} = 1 if $SPS{australian} && $use_variant_level{2}; +$SPS{variant_3} = 1 if $use_variant_level{3}; unless ($use_variant_level{non}) { delete $SPS{$_} foreach (@SPS); @@ -77,10 +84,30 @@ unless ($use_variant_level{non}) { opendir D, $dir or die "Unable to open dir $dir\n"; -my @files = readdir(D); - my @words; +sub deaccent($) { + local $_ = $_[0]; + # from deaccent-toperl.cc + # the line is encoded in iso-8859-1 to match the input encoding: + tr/ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ/AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy/; + return $_; +} +my $add_word; +if (!defined($accents) || $accents eq 'keep') { + $add_word = sub {push @words, $_[0]}; +} elsif ($accents eq 'strip') { + $add_word = sub {push @words, deaccent($_[0])}; +} elsif ($accents eq 'both') { + $add_word = sub {push @words, $_[0]; + my $new_word = deaccent($_[0]); + push @words, $new_word if $new_word ne $_[0];} +} else { + die "--accents must be one of: keep, strip, both\n"; +} + +my @files = readdir(D); + foreach (sort @files) { my ($sp, $type, $size) = /^(\w+)-([^.]+)\.(\d\d)/ or next; next unless $SPS{$sp}; @@ -91,7 +118,7 @@ foreach (sort @files) { open F, "$dir/$_" or die "Unable to open $dir/$_ for reading"; while () { chop; - push @words, $_; + $add_word->($_); } } } @@ -103,8 +130,3 @@ foreach (sort @words) { $prev = $_; } - - - - -