#!/usr/bin/env bash

ASPELL=aspell
HUNSPELL=hunspell
: ${SCOWL:=..}
SPELLER="$SCOWL/speller"
: ${UNIX2DOS:=unix2dos}

set -e

export LANG=C
export LC_ALL=C
export LC_CTYPE=C
export LC_COLLATE=C

SIZE=60

mk-list() { $SCOWL/mk-list -d $SCOWL/final "$@"; }

prep() {
  echo prep 

  cat $SCOWL/misc/{offensive.1,offensive.2,profane.1} | sort -u > nosug
}

doit() {
  echo creating $1.dic

  eval $2 | sort -u > $1.0

  comm -12 $1.0 nosug > $1-nosug.1

  comm -23 $1.0 nosug > $1.1

  $SPELLER/munch-list munch $SPELLER/en.aff < $1-nosug.1 | $SPELLER/add-no-suggest > $1.2

  $SPELLER/munch-list munch $SPELLER/en.aff < $1.1 >> $1.2

  cat $SPELLER/en.dic.supp >> $1.2

  wc -l < $1.2 | tr -d '[:blank:]' > $1.dic
  cat $1.2 | sort | iconv -f iso-8859-1 -t utf-8 >> $1.dic

  cp $SPELLER/en.aff $1.aff
  
  if [ "$SCOWL_VERSION" ]; then
    fn="$1-$SCOWL_VERSION"
  else
    fn="$1"
  fi

  WHAT="$1 Hunspell Dictionary" sh $SPELLER/README_en.txt.sh > README_$1.txt
  if [ -z "$3" ]; then
    echo "Wordlist Command: $2" >> README_$1.txt
  else
    cat $3 >> README_$1.txt
  fi

  rm -f hunspell-$fn.zip
  zip -9 hunspell-$fn.zip README_$1.txt $1.dic $1.aff

  if [ -z "$3" ]; then
    mkdir -p hunspell
    cp hunspell-$fn.zip hunspell/

    #echo check

    cat $1-nosug.1 $1.1 | sort -u > $1.tocheck

    #hunspell -l -d ./$1 < $1.dic.tocheck > misspelled

    cat $1.tocheck | iconv -f iso-8859-1 -t utf-8 | $UNIX2DOS > $1.txt
    cat <<EOF > README.txt
This zip file contains the words found in the corresponding Hunspell
dictionary.  See the file README_$1.txt.
EOF
    zip -9 hunspell/wordlist-$fn.zip README.txt README_$1.txt $1.txt
  fi
}

prep

if [ "$1" = "-all" ]
then 

  doit en_US "mk-list --accents=strip en_US $SIZE"
  doit en_CA "mk-list --accents=strip en_CA $SIZE"
  doit en_GB-ize "mk-list --accents=strip en_GB-ize $SIZE"
  doit en_GB-ise "mk-list --accents=strip en_GB-ise $SIZE"
  doit en_AU "mk-list --accents=strip en_AU $SIZE"

  doit en_US-large "mk-list -v1 --accents=both en_US 70"
  doit en_CA-large "mk-list -v1 --accents=both en_CA 70"
  doit en_GB-large "mk-list -v1 --accents=both en_GB-ize en_GB-ise 70"
  doit en_AU-large "mk-list -v1 --accents=both en_AU 70"

  sh $SPELLER/README_en.txt.sh > hunspell/README

elif [ "$1" = "-one" -a -n "$2" -a -n "$3" ]
then

  doit $2 "cat" $3

else 

  echo "usage: $0 -all | -one <dict-name> <parms file>"

fi

#rm eng*.dat nosug en_US*.? en_CA*.?