#' Read a file in clustal format
#'
#' \code{read.clustal} reads an alignment file in clustal format
#' 
#' 
#' @param file file name of the clustal alignment
#' @export
#' @examples
#' \dontrun{
#' clust.align <- read.clustal("clustal_alignment.txt")
#' }
read.clustal <- function(file,...) {
    ## stolen from ape's read.dna.R
    findFirstSeq <- function(x) {
        ## actually find the 1st non-blank character
        tmp <- regexpr("[[:blank:]]+", x[1]) # consider only a single string
        tmp[1] + attr(tmp, "match.length")
    }
    getSeq <- function(x) {
        x <- gsub(" ", "", x)
        x <- strsplit(x, NULL)
        toupper(unlist(x))
    }

    X <- scan(file = file, what = "", sep = "\n", quiet = TRUE)
    if (!all(grepl("^CLUSTAL",X[1])))
        stop("Doesn't appear to be a file in clustal format")
    ## The first line contains CLUSTAL, and isn't interesting
    X <- X[-1]
    start.seq <- findFirstSeq(X)
    ## now, find how many sequences there are
    leading.spaces <- paste("^ {",start.seq-1,"}",sep="")
    stars <- grep(leading.spaces, X)
    num.seq <- stars[1]-1
    taxa <- gsub(" *$","",substr(X[1:num.seq],1,start.seq-1))
    ## remove the sequence names
    X <- substr(X,start.seq,nchar(X))
    ## number of lines of sequences
    nl <- length(X)
    ## sequence length
    first.seq <- getSeq(X[seq(1,nl,num.seq+1)])
    seqs <- matrix("",num.seq,length(first.seq))
    seqs[1,] <- first.seq
    for (i in 2:num.seq) {
        seqs[i,] <- getSeq(X[seq(i,nl,num.seq+1)])
    }
    rownames(seqs) <- taxa
    as.proteinbin(seqs)
}