X-Git-Url: https://git.donarmstrong.com/?p=deb_pkgs%2Fscowl.git;a=blobdiff_plain;f=current%2Fsrc%2Fmake-variant.lst;h=8bc4f414afd252f76ef7c11693bb20adcea8edfb;hp=aeefd029cf869372f4193b24b2c65292dab706dc;hb=7b14ba883fb1046508c44be37b4c6ba5da5feacf;hpb=b72d489091bf51f14f63db9bec15fe71fa52a395 diff --git a/current/src/make-variant.lst b/current/src/make-variant.lst index aeefd02..8bc4f41 100755 --- a/current/src/make-variant.lst +++ b/current/src/make-variant.lst @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash start () { @@ -15,50 +15,174 @@ rem () comm -23 - $1; } +# +# perform additional proc. on varcon +# + +rm -f everything.lst +cat r/varcon/*.lst \ + | src/filter | sort -u \ + > r/varcon/everything.lst + +cat r/varcon/american.lst r/varcon/american-v?.lst \ + | src/filter | sort -u \ + > r/varcon/all-amer.lst + +cat r/varcon/american.lst r/varcon/other.lst \ + | src/filter | sort -u \ + > r/varcon/nonvariant-amer.lst + +cat r/varcon/american-v?.lst r/varcon/other-v?.lst \ + | src/filter | sort -u \ + > r/varcon/variant-amer.lst + +cat r/varcon/british-v?.lst r/varcon/british_z-v?.lst \ + | src/filter | sort -u \ + > r/varcon/variant-brit.lst + +cat r/varcon/canadian-v?.lst \ + | src/filter | sort -u \ + > r/varcon/variant-can.lst + +cat r/varcon/british-v?.lst r/varcon/british_z-v?.lst r/varcon/canadian-v?.lst \ + | src/filter | sort -u \ + > r/varcon/variant-nonamer.lst + +for sp in american british british_z canadian other; do + for vl in 0 1 2; do + touch r/varcon/$sp-v$vl.lst + done +done + +comm -12 r/varcon/british.lst r/varcon/british_z.lst \ + > r/varcon/british-common.lst + +# +# We let some british variants into the variant lists.. +# see below +# + +for l in 0 1 2 3; do + + cat r/varcon/british-v$l.lst r/varcon/british_z-v$l.lst r/varcon/canadian-v$l.lst \ + | src/filter | sort -u \ + > r/varcon/variant-nonamer-v$l.lst + + comm -12 r/varcon/nonvariant-amer.lst r/varcon/variant-nonamer-v$l.lst \ + > r/varcon/variant-nonamer-v$l-filtered.lst + +done + +# +# perform additional proc. on 12dicts +# +# for each of the lists remove any variants that are known about in +# varcon +# + cat r/alt12dicts/not-variant.lst \ | src/filter | src/add-affixes 0.0 use-all | sort -u \ > working/variant-no.lst +expand() { + src/filter \ + | src/add-affixes $1 use-all | sort -u \ + | rem working/variant-no.lst +} + cat r/alt12dicts/variant-maybe.lst \ - | src/filter | src/add-affixes 0.9 use-all | sort -u \ + | expand 0.9 \ + | rem r/varcon/w_variant-amer.lst \ + | rem r/varcon/nonvariant-amer.lst \ > working/variant-maybe.lst cat r/alt12dicts/variant-yes.lst \ - | src/filter | sort -u \ - > working/variant-yes-bare.lst - -cat r/alt12dicts/variant-yes.lst \ - | src/filter | src/add-affixes 1.9 use-all | sort -u \ + | expand 1.9 \ + | rem r/varcon/w_variant-amer.lst \ + | rem r/varcon/nonvariant-amer.lst \ > working/variant-yes.lst +cat r/alt12dicts/nonamer-maybe.lst \ + | expand 1.9 \ + | rem r/varcon/everything.lst \ + > working/variant-nonamer-maybe.lst + +cat r/alt12dicts/nonamer-yes.lst \ + | expand 1.9 \ + | rem r/varcon/everything.lst \ + > working/variant-nonamer-yes.lst + +# +# Special rule for british and canadian list +# + +cat r/varcon/british.lst r/varcon/british-v0.lst\ + | sort -u > r/varcon/british+.lst + +cat r/varcon/canadian.lst r/varcon/canadian-v0.lst\ + | sort -u > r/varcon/canadian+.lst + +# +# Create the initial variant lists +# + start \ | add working/variant-maybe.lst \ - | rem working/variant-no.lst \ + | add r/varcon/american-v0.lst \ + | add r/varcon/american-v1.lst \ + | add r/varcon/other-v0.lst \ + | add r/varcon/other-v1.lst \ + | add r/varcon/variant-nonamer-v0-filtered.lst \ + | add r/varcon/variant-nonamer-v1-filtered.lst \ | add r/special/variant_0 \ | rem r/special/never-variant \ > working/variant_0-pre.lst start \ | add working/variant-yes.lst \ - | rem working/variant-no.lst \ - | add working/variant-yes-bare.lst \ + | add working/variant-nonamer-maybe.lst \ + | add r/varcon/american-v2.lst \ + | add r/varcon/other-v2.lst \ + | add r/varcon/variant-nonamer-v2-filtered.lst \ | add r/special/variant_1 \ | rem working/variant_0-pre.lst \ | rem r/special/never-variant \ > working/variant_1-pre.lst start \ + | add working/variant-nonamer-yes.lst \ + | add r/varcon/american-v3.lst \ + | add r/varcon/other-v3.lst \ | add r/special/variant_2 \ + | add r/varcon/variant-nonamer-v0.lst \ + | add r/varcon/variant-nonamer-v1.lst \ + | add r/varcon/variant-nonamer-v2.lst \ + | add r/varcon/variant-nonamer-v3.lst \ | rem working/variant_1-pre.lst \ | rem working/variant_0-pre.lst \ | rem r/special/never-variant \ > working/variant_2-pre.lst +# +# Generate special never-variant list +# + +start \ + | add working/variant_0-pre.lst \ + | egrep "wards?$" \ + | add r/special/never-variant \ + > working/never-variant.lst + +# +# Now take into account variant info from AGID +# and also special never-veriant list +# + start \ | add r/infl/variant_0.lst \ | rem working/variant_2-pre.lst | rem working/variant_1-pre.lst \ | add working/variant_0-pre.lst \ - | rem r/special/never-variant \ + | rem working/never-variant.lst \ > working/variant_0.lst start \ @@ -67,7 +191,7 @@ start \ | add working/variant_1-pre.lst \ | add r/infl/variant_0.lst \ | rem working/variant_0.lst \ - | rem r/special/never-variant \ + | rem working/never-variant.lst \ > working/variant_1.lst start \ @@ -75,11 +199,66 @@ start \ | add working/variant_2-pre.lst \ | add r/infl/variant_1.lst | add r/infl/variant_0.lst \ | rem working/variant_1.lst | rem working/variant_0.lst \ - | rem r/special/never-variant \ + | rem working/never-variant.lst \ > working/variant_2.lst +# +# Make brit. and can. variant lists +# + +start \ + | add working/variant_0.lst\ + | rem r/varcon/all-amer.lst\ + | add r/varcon/british-v0.lst\ + | add r/varcon/british-v1.lst\ + | add r/varcon/british_z-v0.lst\ + | add r/varcon/british_z-v1.lst\ + | rem r/varcon/british-common.lst\ + > working/british_variant_0.lst + +start \ + | add working/variant_1.lst\ + | rem r/varcon/all-amer.lst\ + | add r/varcon/british-v2.lst\ + | add r/varcon/british_z-v2.lst\ + | rem working/british_variant_0.lst\ + | rem r/varcon/british-common.lst\ + > working/british_variant_1.lst + +start \ + | add working/variant_0.lst\ + | rem r/varcon/all-amer.lst\ + | add r/varcon/canadian-v0.lst\ + | add r/varcon/canadian-v1.lst\ + | rem r/varcon/canadian.lst\ + > working/canadian_variant_0.lst + +start \ + | add working/variant_1.lst\ + | rem r/varcon/all-amer.lst\ + | add r/varcon/canadian-v2.lst\ + | rem working/canadian_variant_0.lst\ + | rem r/varcon/canadian.lst\ + > working/canadian_variant_1.lst + +# +# Now fix up american/british/canadian lists to make sure we don't +# leave anything out +# + +cat working/variant_{0,1,2}.lst \ + working/{british,canadian}_variant_{0,1}.lst \ + r/varcon/{american,british+,british_z,canadian+}.lst \ + | sort -u > working/variant-somewhere.lst +comm -12 working/never-variant.lst working/variant-somewhere.lst \ + | add r/varcon/american.lst \ + > working/american.lst +# nothing to do for other lists, yet +cp r/varcon/british+.lst working/british.lst +cp r/varcon/british_z.lst working/british_z.lst +cp r/varcon/canadian+.lst working/canadian.lst