X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=speller%2Fmake-hunspell-dict;fp=speller%2Fmake-hunspell-dict;h=d23f28eb2c3c08fed273771ad13e56d07565c223;hb=b13ea8a082364672c6de2b010e558211ff52ec9a;hp=0000000000000000000000000000000000000000;hpb=01534a94130c1f5a3a230cf4fe18365a235ba271;p=deb_pkgs%2Fscowl.git diff --git a/speller/make-hunspell-dict b/speller/make-hunspell-dict new file mode 100755 index 0000000..d23f28e --- /dev/null +++ b/speller/make-hunspell-dict @@ -0,0 +1,108 @@ +#!/usr/bin/env bash + +ASPELL=aspell +HUNSPELL=hunspell +: ${SCOWL:=..} +SPELLER="$SCOWL/speller" +: ${UNIX2DOS:=unix2dos} + +set -e + +export LANG=C +export LC_ALL=C +export LC_CTYPE=C +export LC_COLLATE=C + +SIZE=60 + +mk-list() { $SCOWL/mk-list -d $SCOWL/final "$@"; } + +prep() { + echo prep + + cat $SCOWL/misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug +} + +doit() { + echo creating $1.dic + + eval $2 | sort -u > $1.0 + + comm -12 $1.0 nosug > $1-nosug.1 + + comm -23 $1.0 nosug > $1.1 + + $SPELLER/munch-list munch $SPELLER/en.aff < $1-nosug.1 | $SPELLER/add-no-suggest > $1.2 + + $SPELLER/munch-list munch $SPELLER/en.aff < $1.1 >> $1.2 + + cat $SPELLER/en.dic.supp >> $1.2 + + wc -l < $1.2 | tr -d '[:blank:]' > $1.dic + cat $1.2 | sort | iconv -f iso-8859-1 -t utf-8 >> $1.dic + + cp $SPELLER/en.aff $1.aff + + if [ "$SCOWL_VERSION" ]; then + fn="$1-$SCOWL_VERSION" + else + fn="$1" + fi + + WHAT="$1 Hunspell Dictionary" sh $SPELLER/README_en.txt.sh > README_$1.txt + if [ -z "$3" ]; then + echo "Wordlist Command: $2" >> README_$1.txt + else + cat $3 >> README_$1.txt + fi + + rm -f hunspell-$fn.zip + zip -9 hunspell-$fn.zip README_$1.txt $1.dic $1.aff + + if [ -z "$3" ]; then + mkdir -p hunspell + cp hunspell-$fn.zip hunspell/ + + #echo check + + cat $1-nosug.1 $1.1 | sort -u > $1.tocheck + + #hunspell -l -d ./$1 < $1.dic.tocheck > misspelled + + cat $1.tocheck | iconv -f iso-8859-1 -t utf-8 | $UNIX2DOS > $1.txt + cat < README.txt +This zip file contains the words found in the corresponding Hunspell +dictionary. See the file README_$1.txt. +EOF + zip -9 hunspell/wordlist-$fn.zip README.txt README_$1.txt $1.txt + fi +} + +prep + +if [ "$1" = "-all" ] +then + + doit en_US "mk-list --accents=strip en_US $SIZE" + doit en_CA "mk-list --accents=strip en_CA $SIZE" + doit en_GB-ize "mk-list --accents=strip en_GB-ize $SIZE" + doit en_GB-ise "mk-list --accents=strip en_GB-ise $SIZE" + + doit en_US-large "mk-list -v1 --accents=both en_US 70" + doit en_CA-large "mk-list -v1 --accents=both en_CA 70" + doit en_GB-large "mk-list -v1 --accents=both en_GB-ize en_GB-ise 70" + + sh $SPELLER/README_en.txt.sh > hunspell/README + +elif [ "$1" = "-one" -a -n "$2" -a -n "$3" ] +then + + doit $2 "cat" $3 + +else + + echo "usage: $0 -all | -one " + +fi + +#rm eng*.dat nosug en_US*.? en_CA*.?