-#!/bin/bash
+#!/usr/bin/env bash
ASPELL=aspell
HUNSPELL=hunspell
+: ${SCOWL:=..}
+SPELLER="$SCOWL/speller"
+: ${UNIX2DOS:=unix2dos}
set -e
export LC_CTYPE=C
export LC_COLLATE=C
-prep() {
- echo prep
+SIZE=60
- cat misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug
+mk-list() { $SCOWL/mk-list -d $SCOWL/final "$@"; }
- cp en.aff eng_affix.dat
+prep() {
+ echo prep
- cat << EOF > eng.dat
-name eng
-charset iso8859-1
-special ' -*-
-affix eng
-EOF
+ cat $SCOWL/misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug
}
doit() {
- echo creating en_$1.dic
+ echo creating $1.dic
- cat en-common.wl en_$1-wo_accents-only.wl | sort -u > en_$1.0
+ eval $2 | sort -u > $1.0
- comm en_$1.0 nosug -12 > en_$1-nosug.1
+ comm -12 $1.0 nosug > $1-nosug.1
- comm en_$1.0 nosug -23 > en_$1.1
+ comm -23 $1.0 nosug > $1.1
- $ASPELL -l ./eng munch-list < en_$1-nosug.1 | grep -v '^\(XXX\|>>>\)' | ./add-no-suggest > en_$1.2
+ $SPELLER/munch-list munch $SPELLER/en.aff < $1-nosug.1 | $SPELLER/add-no-suggest > $1.2
- $ASPELL -l ./eng munch-list < en_$1.1 | grep -v '^\(XXX\|>>>\)' >> en_$1.2
+ $SPELLER/munch-list munch $SPELLER/en.aff < $1.1 >> $1.2
- cat en.dic.supp >> en_$1.2
+ cat $SPELLER/en.dic.supp >> $1.2
- wc -l en_$1.2 | cut -d' ' -f1 > en_$1.dic
- cat en_$1.2 | sort >> en_$1.dic
+ wc -l < $1.2 | tr -d '[:blank:]' > $1.dic
+ cat $1.2 | sort | iconv -f iso-8859-1 -t utf-8 >> $1.dic
- cp en.aff en_$1.aff
+ cp $SPELLER/en.aff $1.aff
- cat README_en.txt.in ../Copyright > README_en_$1.txt
- echo >> README_en_$1.txt
- echo "Build Date: `date`" >> README_en_$1.txt
+ if [ "$SCOWL_VERSION" ]; then
+ fn="$1-$SCOWL_VERSION"
+ else
+ fn="$1"
+ fi
- zip -9 en_$1.zip README_en_$1.txt en_$1.dic en_$1.aff
+ WHAT="$1 Hunspell Dictionary" sh $SPELLER/README_en.txt.sh > README_$1.txt
+ if [ -z "$3" ]; then
+ echo "Wordlist Command: $2" >> README_$1.txt
+ else
+ cat $3 >> README_$1.txt
+ fi
- cp -p en_$1.zip hunspell/
+ rm -f hunspell-$fn.zip
+ zip -9 hunspell-$fn.zip README_$1.txt $1.dic $1.aff
- #echo check
+ if [ -z "$3" ]; then
+ mkdir -p hunspell
+ cp hunspell-$fn.zip hunspell/
- cat en_$1-nosug.1 en_$1.1 | sort -u > en_$1.tocheck
+ #echo check
- #hunspell -l -d ./en_$1 < en_$1.dic.tocheck > misspelled
-}
+ cat $1-nosug.1 $1.1 | sort -u > $1.tocheck
+
+ #hunspell -l -d ./$1 < $1.dic.tocheck > misspelled
+ cat $1.tocheck | iconv -f iso-8859-1 -t utf-8 | $UNIX2DOS > $1.txt
+ cat <<EOF > README.txt
+This zip file contains the words found in the corresponding Hunspell
+dictionary. See the file README_$1.txt.
+EOF
+ zip -9 hunspell/wordlist-$fn.zip README.txt README_$1.txt $1.txt
+ fi
+}
prep
-doit US
-doit CA
+if [ "$1" = "-all" ]
+then
+
+ doit en_US "mk-list --accents=strip en_US $SIZE"
+ doit en_CA "mk-list --accents=strip en_CA $SIZE"
+ doit en_GB-ize "mk-list --accents=strip en_GB-ize $SIZE"
+ doit en_GB-ise "mk-list --accents=strip en_GB-ise $SIZE"
+ doit en_AU "mk-list --accents=strip en_AU $SIZE"
+
+ doit en_US-large "mk-list -v1 --accents=both en_US 70"
+ doit en_CA-large "mk-list -v1 --accents=both en_CA 70"
+ doit en_GB-large "mk-list -v1 --accents=both en_GB-ize en_GB-ise 70"
+ doit en_AU-large "mk-list -v1 --accents=both en_AU 70"
+
+ sh $SPELLER/README_en.txt.sh > hunspell/README
+
+elif [ "$1" = "-one" -a -n "$2" -a -n "$3" ]
+then
+
+ doit $2 "cat" $3
+
+else
+
+ echo "usage: $0 -all | -one <dict-name> <parms file>"
+
+fi
-rm eng*.dat nosug en_US*.? en_CA*.?
+#rm eng*.dat nosug en_US*.? en_CA*.?