]> git.donarmstrong.com Git - deb_pkgs/scowl.git/blobdiff - src/make-variant.lst
Imported Upstream version 2015.08.24
[deb_pkgs/scowl.git] / src / make-variant.lst
diff --git a/src/make-variant.lst b/src/make-variant.lst
new file mode 100755 (executable)
index 0000000..1329b14
--- /dev/null
@@ -0,0 +1,264 @@
+#!/bin/bash
+
+start ()
+{
+    echo -n;
+}
+
+add ()
+{
+  cat - $1 | src/filter | sort -u;
+}
+
+rem ()
+{
+  comm -23 - $1;
+}
+
+#
+# perform additional proc. on varcon
+#
+
+rm -f everything.lst
+cat r/varcon/*.lst \
+  | src/filter | sort -u \
+  > r/varcon/everything.lst
+
+cat r/varcon/american.lst r/varcon/american-v?.lst \
+  | src/filter | sort -u \
+  > r/varcon/all-amer.lst
+
+cat r/varcon/american.lst r/varcon/other.lst \
+  | src/filter | sort -u \
+  > r/varcon/nonvariant-amer.lst
+
+cat r/varcon/american-v?.lst r/varcon/other-v?.lst \
+  | src/filter | sort -u \
+  > r/varcon/variant-amer.lst
+
+cat r/varcon/british-v?.lst r/varcon/british_z-v?.lst \
+  | src/filter | sort -u \
+  > r/varcon/variant-brit.lst
+
+cat r/varcon/canadian-v?.lst \
+  | src/filter | sort -u \
+  > r/varcon/variant-can.lst
+
+cat r/varcon/british-v?.lst r/varcon/british_z-v?.lst r/varcon/canadian-v?.lst \
+  | src/filter | sort -u \
+  > r/varcon/variant-nonamer.lst
+
+for sp in american british british_z canadian other; do
+  for vl in 0 1 2; do
+    touch r/varcon/$sp-v$vl.lst
+  done
+done
+
+comm -12 r/varcon/british.lst r/varcon/british_z.lst \
+  > r/varcon/british-common.lst
+
+#
+# We let some british variants into the variant lists..
+# see below
+#
+
+for l in 0 1 2 3; do
+
+  cat r/varcon/british-v$l.lst r/varcon/british_z-v$l.lst r/varcon/canadian-v$l.lst \
+    | src/filter | sort -u \
+    > r/varcon/variant-nonamer-v$l.lst
+
+  comm -12 r/varcon/nonvariant-amer.lst r/varcon/variant-nonamer-v$l.lst \
+    > r/varcon/variant-nonamer-v$l-filtered.lst
+
+done
+
+#
+# perform additional proc. on 12dicts
+#
+# for each of the lists remove any variants that are known about in
+# varcon
+#
+
+cat r/alt12dicts/not-variant.lst \
+  | src/filter | src/add-affixes 0.0 use-all | sort -u \
+  > working/variant-no.lst
+
+expand() {
+    src/filter \
+      | src/add-affixes $1 use-all | sort -u \
+      | rem working/variant-no.lst
+}
+
+cat r/alt12dicts/variant-maybe.lst \
+  | expand 0.9                     \
+  | rem r/varcon/w_variant-amer.lst  \
+  | rem r/varcon/nonvariant-amer.lst \
+  > working/variant-maybe.lst
+
+cat r/alt12dicts/variant-yes.lst \
+  | expand 1.9                   \
+  | rem r/varcon/w_variant-amer.lst  \
+  | rem r/varcon/nonvariant-amer.lst \
+  > working/variant-yes.lst
+
+cat r/alt12dicts/nonamer-maybe.lst    \
+  | expand 1.9                        \
+  | rem r/varcon/everything.lst       \
+  > working/variant-nonamer-maybe.lst 
+
+cat r/alt12dicts/nonamer-yes.lst      \
+  | expand 1.9                        \
+  | rem r/varcon/everything.lst       \
+  > working/variant-nonamer-yes.lst
+
+#
+# Special rule for british and canadian list
+#
+
+cat r/varcon/british.lst r/varcon/british-v0.lst\
+  | sort -u > r/varcon/british+.lst
+
+cat r/varcon/canadian.lst r/varcon/canadian-v0.lst\
+  | sort -u > r/varcon/canadian+.lst
+
+#
+# Create the initial variant lists
+#
+
+start \
+  | add working/variant-maybe.lst    \
+  | add r/varcon/american-v0.lst     \
+  | add r/varcon/american-v1.lst     \
+  | add r/varcon/other-v0.lst        \
+  | add r/varcon/other-v1.lst        \
+  | add r/varcon/variant-nonamer-v0-filtered.lst \
+  | add r/varcon/variant-nonamer-v1-filtered.lst \
+  | add r/special/variant_1          \
+  | rem r/special/never-variant      \
+  > working/variant_1-pre.lst
+
+start \
+  | add working/variant-yes.lst      \
+  | add working/variant-nonamer-maybe.lst \
+  | add r/varcon/american-v2.lst     \
+  | add r/varcon/other-v2.lst        \
+  | add r/varcon/variant-nonamer-v2-filtered.lst \
+  | add r/special/variant_2          \
+  | rem working/variant_1-pre.lst    \
+  | rem r/special/never-variant      \
+  > working/variant_2-pre.lst
+
+start \
+  | add working/variant-nonamer-yes.lst \
+  | add r/varcon/american-v3.lst     \
+  | add r/varcon/other-v3.lst        \
+  | add r/special/variant_3          \
+  | add r/varcon/variant-nonamer-v0.lst \
+  | add r/varcon/variant-nonamer-v1.lst \
+  | add r/varcon/variant-nonamer-v2.lst \
+  | add r/varcon/variant-nonamer-v3.lst \
+  | rem working/variant_2-pre.lst    \
+  | rem working/variant_1-pre.lst    \
+  | rem r/special/never-variant      \
+  > working/variant_3-pre.lst
+
+#
+# Generate special never-variant list
+#
+
+start \
+  | add working/variant_1-pre.lst \
+  | egrep "wards?$"               \
+  | add r/special/never-variant   \
+  > working/never-variant.lst
+
+#
+# Now take into account variant info from AGID
+# and also special never-veriant list 
+#
+
+start \
+  | add r/infl/variant_0.lst         \
+  | rem working/variant_3-pre.lst | rem working/variant_2-pre.lst    \
+  | add working/variant_1-pre.lst    \
+  | rem working/never-variant.lst \
+  > working/variant_1.lst
+
+start \
+  | add r/infl/variant_1.lst         \
+  | rem working/variant_3-pre.lst    \
+  | add working/variant_2-pre.lst    \
+  | add r/infl/variant_0.lst         \
+  | rem working/variant_1.lst        \
+  | rem working/never-variant.lst \
+  > working/variant_2.lst
+
+start \
+  | add r/infl/variant_2.lst         \
+  | add working/variant_3-pre.lst    \
+  | add r/infl/variant_1.lst  | add r/infl/variant_0.lst         \
+  | rem working/variant_2.lst | rem working/variant_1.lst        \
+  | rem working/never-variant.lst \
+  > working/variant_3.lst
+
+#
+# Make brit. and can. variant lists
+# 
+
+start \
+  | add working/variant_1.lst\
+  | rem r/varcon/all-amer.lst\
+  | add r/varcon/british-v0.lst\
+  | add r/varcon/british-v1.lst\
+  | add r/varcon/british_z-v0.lst\
+  | add r/varcon/british_z-v1.lst\
+  | rem r/varcon/british-common.lst\
+  > working/british_variant_1.lst
+
+start \
+  | add working/variant_2.lst\
+  | rem r/varcon/all-amer.lst\
+  | add r/varcon/british-v2.lst\
+  | add r/varcon/british_z-v2.lst\
+  | rem working/british_variant_1.lst\
+  | rem r/varcon/british-common.lst\
+  > working/british_variant_2.lst
+
+start \
+  | add working/variant_1.lst\
+  | rem r/varcon/all-amer.lst\
+  | add r/varcon/canadian-v0.lst\
+  | add r/varcon/canadian-v1.lst\
+  | rem r/varcon/canadian.lst\
+  > working/canadian_variant_1.lst
+
+start \
+  | add working/variant_2.lst\
+  | rem r/varcon/all-amer.lst\
+  | add r/varcon/canadian-v2.lst\
+  | rem working/canadian_variant_1.lst\
+  | rem r/varcon/canadian.lst\
+  > working/canadian_variant_2.lst
+
+#
+# Now fix up american/british/canadian lists to make sure we don't
+# leave anything out
+# 
+
+cat working/variant_{1,2,3}.lst \
+    working/{british,canadian}_variant_{1,2}.lst \
+    r/varcon/{american,british+,british_z,canadian+}.lst \
+  | sort -u > working/variant-somewhere.lst
+
+comm -12 working/never-variant.lst working/variant-somewhere.lst \
+  | add r/varcon/american.lst \
+  > working/american.lst
+
+# nothing to do for other lists, yet
+
+cp r/varcon/british+.lst working/british.lst
+
+cp r/varcon/british_z.lst working/british_z.lst
+
+cp r/varcon/canadian+.lst working/canadian.lst