X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=speller%2Fmake-hunspell-dict;h=3d02ddaf1c0f83738eea61fb1744eeea4cd00cbf;hb=84671135633ddb6d4b435bbe71fe3c6db4848e54;hp=c09108382d409cf639bc46a9ee5cce257f4bf11f;hpb=adbd0c3d3296ead6c49b4f13012aca4a558efc7f;p=deb_pkgs%2Fscowl.git diff --git a/speller/make-hunspell-dict b/speller/make-hunspell-dict index c091083..3d02dda 100755 --- a/speller/make-hunspell-dict +++ b/speller/make-hunspell-dict @@ -1,7 +1,10 @@ -#!/bin/bash +#!/usr/bin/env bash ASPELL=aspell HUNSPELL=hunspell +: ${SCOWL:=..} +SPELLER="$SCOWL/speller" +: ${UNIX2DOS:=unix2dos} set -e @@ -10,60 +13,98 @@ export LC_ALL=C export LC_CTYPE=C export LC_COLLATE=C -prep() { - echo prep +SIZE=60 - cat misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug +mk-list() { $SCOWL/mk-list -d $SCOWL/final "$@"; } - cp en.aff eng_affix.dat +prep() { + echo prep - cat << EOF > eng.dat -name eng -charset iso8859-1 -special ' -*- -affix eng -EOF + cat $SCOWL/misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug } doit() { - echo creating en_$1.dic + echo creating $1.dic - cat en-common.wl en_$1-wo_accents-only.wl | sort -u > en_$1.0 + eval $2 | sort -u > $1.0 - comm en_$1.0 nosug -12 > en_$1-nosug.1 + comm -12 $1.0 nosug > $1-nosug.1 - comm en_$1.0 nosug -23 > en_$1.1 + comm -23 $1.0 nosug > $1.1 - $ASPELL -l ./eng munch-list < en_$1-nosug.1 | grep -v '^\(XXX\|>>>\)' | ./add-no-suggest > en_$1.2 + $SPELLER/munch-list munch $SPELLER/en.aff < $1-nosug.1 | $SPELLER/add-no-suggest > $1.2 - $ASPELL -l ./eng munch-list < en_$1.1 | grep -v '^\(XXX\|>>>\)' >> en_$1.2 + $SPELLER/munch-list munch $SPELLER/en.aff < $1.1 >> $1.2 - cat en.dic.supp >> en_$1.2 + cat $SPELLER/en.dic.supp >> $1.2 - wc -l en_$1.2 | cut -d' ' -f1 > en_$1.dic - cat en_$1.2 | sort >> en_$1.dic + wc -l < $1.2 | tr -d '[:blank:]' > $1.dic + cat $1.2 | sort | iconv -f iso-8859-1 -t utf-8 >> $1.dic - cp en.aff en_$1.aff + cp $SPELLER/en.aff $1.aff - cat README_en.txt.in ../Copyright > README_en_$1.txt - echo >> README_en_$1.txt - echo "Build Date: `date`" >> README_en_$1.txt + if [ "$SCOWL_VERSION" ]; then + fn="$1-$SCOWL_VERSION" + else + fn="$1" + fi - zip -9 en_$1.zip README_en_$1.txt en_$1.dic en_$1.aff + WHAT="$1 Hunspell Dictionary" sh $SPELLER/README_en.txt.sh > README_$1.txt + if [ -z "$3" ]; then + echo "Wordlist Command: $2" >> README_$1.txt + else + cat $3 >> README_$1.txt + fi - cp -p en_$1.zip hunspell/ + rm -f hunspell-$fn.zip + zip -9 hunspell-$fn.zip README_$1.txt $1.dic $1.aff - #echo check + if [ -z "$3" ]; then + mkdir -p hunspell + cp hunspell-$fn.zip hunspell/ - cat en_$1-nosug.1 en_$1.1 | sort -u > en_$1.tocheck + #echo check - #hunspell -l -d ./en_$1 < en_$1.dic.tocheck > misspelled -} + cat $1-nosug.1 $1.1 | sort -u > $1.tocheck + + #hunspell -l -d ./$1 < $1.dic.tocheck > misspelled + cat $1.tocheck | iconv -f iso-8859-1 -t utf-8 | $UNIX2DOS > $1.txt + cat < README.txt +This zip file contains the words found in the corresponding Hunspell +dictionary. See the file README_$1.txt. +EOF + zip -9 hunspell/wordlist-$fn.zip README.txt README_$1.txt $1.txt + fi +} prep -doit US -doit CA +if [ "$1" = "-all" ] +then + + doit en_US "mk-list --accents=strip en_US $SIZE" + doit en_CA "mk-list --accents=strip en_CA $SIZE" + doit en_GB-ize "mk-list --accents=strip en_GB-ize $SIZE" + doit en_GB-ise "mk-list --accents=strip en_GB-ise $SIZE" + doit en_AU "mk-list --accents=strip en_AU $SIZE" + + doit en_US-large "mk-list -v1 --accents=both en_US 70" + doit en_CA-large "mk-list -v1 --accents=both en_CA 70" + doit en_GB-large "mk-list -v1 --accents=both en_GB-ize en_GB-ise 70" + doit en_AU-large "mk-list -v1 --accents=both en_AU 70" + + sh $SPELLER/README_en.txt.sh > hunspell/README + +elif [ "$1" = "-one" -a -n "$2" -a -n "$3" ] +then + + doit $2 "cat" $3 + +else + + echo "usage: $0 -all | -one " + +fi -rm eng*.dat nosug en_US*.? en_CA*.? +#rm eng*.dat nosug en_US*.? en_CA*.?