Imported Upstream version 2015.08.24

[deb_pkgs/scowl.git] / speller / make-hunspell-dict
diff --git a/speller/make-hunspell-dict b/speller/make-hunspell-dict

new file mode 100755 (executable)

index 0000000..d23f28e
--- /dev/null
+++ b/speller/make-hunspell-dict
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+
+ASPELL=aspell
+HUNSPELL=hunspell
+: ${SCOWL:=..}
+SPELLER="$SCOWL/speller"
+: ${UNIX2DOS:=unix2dos}
+
+set -e
+
+export LANG=C
+export LC_ALL=C
+export LC_CTYPE=C
+export LC_COLLATE=C
+
+SIZE=60
+
+mk-list() { $SCOWL/mk-list -d $SCOWL/final "$@"; }
+
+prep() {
+  echo prep 
+
+  cat $SCOWL/misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug
+}
+
+doit() {
+  echo creating $1.dic
+
+  eval $2 | sort -u > $1.0
+
+  comm -12 $1.0 nosug > $1-nosug.1
+
+  comm -23 $1.0 nosug > $1.1
+
+  $SPELLER/munch-list munch $SPELLER/en.aff < $1-nosug.1 | $SPELLER/add-no-suggest > $1.2
+
+  $SPELLER/munch-list munch $SPELLER/en.aff < $1.1 >> $1.2
+
+  cat $SPELLER/en.dic.supp >> $1.2
+
+  wc -l < $1.2 | tr -d '[:blank:]' > $1.dic
+  cat $1.2 | sort | iconv -f iso-8859-1 -t utf-8 >> $1.dic
+
+  cp $SPELLER/en.aff $1.aff
+  
+  if [ "$SCOWL_VERSION" ]; then
+    fn="$1-$SCOWL_VERSION"
+  else
+    fn="$1"
+  fi
+
+  WHAT="$1 Hunspell Dictionary" sh $SPELLER/README_en.txt.sh > README_$1.txt
+  if [ -z "$3" ]; then
+    echo "Wordlist Command: $2" >> README_$1.txt
+  else
+    cat $3 >> README_$1.txt
+  fi
+
+  rm -f hunspell-$fn.zip
+  zip -9 hunspell-$fn.zip README_$1.txt $1.dic $1.aff
+
+  if [ -z "$3" ]; then
+    mkdir -p hunspell
+    cp hunspell-$fn.zip hunspell/
+
+    #echo check
+
+    cat $1-nosug.1 $1.1 | sort -u > $1.tocheck
+
+    #hunspell -l -d ./$1 < $1.dic.tocheck > misspelled
+
+    cat $1.tocheck | iconv -f iso-8859-1 -t utf-8 | $UNIX2DOS > $1.txt
+    cat <<EOF > README.txt
+This zip file contains the words found in the corresponding Hunspell
+dictionary.  See the file README_$1.txt.
+EOF
+    zip -9 hunspell/wordlist-$fn.zip README.txt README_$1.txt $1.txt
+  fi
+}
+
+prep
+
+if [ "$1" = "-all" ]
+then 
+
+  doit en_US "mk-list --accents=strip en_US $SIZE"
+  doit en_CA "mk-list --accents=strip en_CA $SIZE"
+  doit en_GB-ize "mk-list --accents=strip en_GB-ize $SIZE"
+  doit en_GB-ise "mk-list --accents=strip en_GB-ise $SIZE"
+
+  doit en_US-large "mk-list -v1 --accents=both en_US 70"
+  doit en_CA-large "mk-list -v1 --accents=both en_CA 70"
+  doit en_GB-large "mk-list -v1 --accents=both en_GB-ize en_GB-ise 70"
+
+  sh $SPELLER/README_en.txt.sh > hunspell/README
+
+elif [ "$1" = "-one" -a -n "$2" -a -n "$3" ]
+then
+
+  doit $2 "cat" $3
+
+else 
+
+  echo "usage: $0 -all | -one <dict-name> <parms file>"
+
+fi
+
+#rm eng*.dat nosug en_US*.? en_CA*.?