4 "[.proteinbin" <- function(x,i,j,drop=FALSE) {
5 ans <- NextMethod("[",drop=drop)
6 class(ans) <- "proteinbin"
10 as.matrix.proteinbin <- function(x,...) {
11 if (is.matrix(x)) return(x)
13 dim(x) <- c(1, length(x))
16 s <- unique(unlist(lapply(x, length)))
18 stop("Protein sequences in list not of the same length.")
21 y <- matrix(as.raw(0), n, s)
22 for (i in seq_len(n)) y[i, ] <- x[[i]]
24 class(y) <- "proteinbin"
28 as.list.proteinbin <- function(x, ...)
30 if (is.list(x)) return(x)
31 if (is.null(dim(x))) obj <- list(x) # cause is.vector() doesn't work
34 obj <- vector("list", n)
35 for (i in 1:n) obj[[i]] <- x[i, ]
36 names(obj) <- rownames(x)
38 class(obj) <- "proteinbin"
42 rbind.proteinbin <- function(...)
43 ### works only with matrices for the moment
47 if (n == 1) return(obj[[1]])
49 if (!is.matrix(obj[[i]]))
50 stop("the 'rbind' method for \"proteinbin\" accepts only matrices")
51 NC <- unlist(lapply(obj, ncol))
52 if (length(unique(NC)) > 1)
53 stop("matrices do not have the same number of columns.")
54 for (i in 1:n) class(obj[[i]]) <- NULL
55 for (i in 2:n) obj[[1]] <- rbind(obj[[1]], obj[[i]])
56 structure(obj[[1]], class = "proteinbin")
60 function(..., check.names = TRUE, fill.with.gaps = FALSE,
62 ### works only with matrices for the moment
66 if (n == 1) return(obj[[1]])
68 if (!is.matrix(obj[[i]]))
69 stop("the 'cbind' method for \"proteinbin\" accepts only matrices")
70 NR <- unlist(lapply(obj, nrow))
71 for (i in 1:n) class(obj[[i]]) <- NULL
73 nms <- unlist(lapply(obj, rownames))
75 NC <- unlist(lapply(obj, ncol))
77 ans <- matrix(as.raw(4), length(nms), sum(NC))
81 to <- from + NC[i] - 1
82 tmp <- rownames(obj[[i]])
83 nmsi <- tmp[tmp %in% nms]
84 ans[nmsi, from:to] <- obj[[i]][nmsi, , drop = FALSE]
90 nms <- names(tab)[which(ubi)]
91 ans <- obj[[1]][nms, , drop = FALSE]
93 ans <- cbind(ans, obj[[i]][nms, , drop = FALSE])
94 if (!quiet && !all(ubi))
95 warning("some rows were dropped.")
98 if (length(unique(NR)) > 1)
99 stop("matrices do not have the same number of rows.")
100 ans <- matrix(unlist(obj), NR)
101 rownames(ans) <- rownames(obj[[1]])
103 class(ans) <- "proteinbin"
107 c.proteinbin <- function(..., recursive = FALSE)
109 if (!all(unlist(lapply(list(...), is.list))))
110 stop("the 'c' method for \"proteinbin\" accepts only lists")
111 structure(NextMethod("c"), class = "proteinbin")
114 print.proteinbin <- function(x, printlen = 6, digits = 3, ...)
120 cat("1 Protein sequence stored in a list.\n\n")
121 nTot <- length(x[[1]])
122 cat("Sequence length:", nTot, "\n\n")
123 cat("Label:", nms, "\n\n")
125 cat(n, "Protein sequences stored in a list.\n\n")
126 tmp <- unlist(lapply(x, length))
132 cat("All sequences of same length:", maxi, "\n")
134 cat("Mean sequence length:", round(mean(tmp), 3), "\n")
135 cat(" Shortest sequence:", mini, "\n")
136 cat(" Longest sequence:", maxi, "\n")
140 nms <- nms[1:printlen]
143 cat("\nLabels:", paste(nms, collapse = " "), TAIL)
150 cat(nd[1], "Protein sequences stored in a matrix.\n\n")
151 cat("All sequences of same length:", nd[2], "\n")
153 if (printlen < nd[1]) {
154 nms <- nms[1:printlen]
157 cat("\nLabels:", paste(nms, collapse = " "), TAIL)
159 cat("1 Protein sequence in binary format stored in a vector.\n\n")
160 cat("Sequence length:", nTot, "\n\n")
164 cat("Base composition:\n")
165 print(round(base.freq(x), digits))
166 } else cat("More than 1 million nucleotides: not printing base composition\n")
169 as.proteinbin <- function(x, ...) UseMethod("as.proteinbin")
172 c("A","B","C","D","E","F","G","H","I","K","L","M","N","P",
173 "Q","R","S","T","V","W","X","Y","Z","-")
178 ### from http://en.wikipedia.org/wiki/Amino_acid and http://www.bioinformatics.org/sms2/iupac.html
207 "B"="Apartic acid or Aspargine",
227 "X"="Any amino acid",
228 "Z"="Glutamine or Glutamic acid",
229 "-"="Gap in alignment")
231 as.proteinbin.character <- function(x, ...)
234 if (any(nchar(x)>1)) {
236 stop("Cannot convert a matrix of multiple characters into a sequence object")
238 x <- do.call(rbind,strsplit(x,""))
241 for(alias in names(._letters_alias_)) {
242 ans[ans==alias] <- ._letters_alias_[[alias]]
244 if (any(!(ans %in% ._letters_))) {
245 stop("invalid characters for protein sequence")
250 dimnames(ans) <- dimnames(x)
252 class(ans) <- "proteinbin"
257 as.proteinbin.list <- function(x, ...)
259 obj <- lapply(x, as.proteinbin)
260 class(obj) <- "proteinbin"
264 ## they're already characters
265 as.character.proteinbin <- function(x, ...)
270 if (is.list(x)) lapply(x, f) else f(x)
273 base.freq <- function(x, freq = FALSE, all = FALSE)
276 ans <- table(x,deparse.level=0)
280 BF <- rowSums(sapply(x, f))
281 n <- sum(sapply(x, length))
288 if (!freq) BF <- BF / n
290 if (!freq) BF <- BF / sum(BF)
296 ### where <- function(x, pattern)
298 ### pat <- as.proteinbin(strsplit(pattern, NULL)[[1]])
299 ### p <- as.integer(length(pat))
301 ### foo <- function(x, pat, p) {
302 ### s <- as.integer(length(x))
303 ### if (s < p) stop("sequence shorter than the pattern")
304 ### ans <- .C("where", x, pat, s, p, integer(s), 0L,
305 ### DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
307 ### if (n) ans[[5]][seq_len(n)] - p + 2L else integer()
310 ### if (is.list(x)) return(lapply(x, foo, pat = pat, p = p))
311 ### if (is.matrix(x)) {
313 ### res <- vector("list", n)
314 ### for (i in seq_along(n))
315 ### res[[i]] <- foo(x[i, , drop = TRUE], pat, p)
316 ### names(res) <- rownames(x)
319 ### foo(x, pat, p) # if x is a vector