#' plot.clustal #' library(grid) plot.clustal <- function(clustal,tree=NULL,refseq=1,refseq.start=1) { string.width <- function(x) { sapply(x,function(y){convertX(unit(1,"strwidth",y),"mm", valueOnly=TRUE)}) } ## http://www.jalview.org/help/html/colourSchemes/clustal.html ## https://www.cgl.ucsf.edu/chimera/docs/ContributedSoftware/multalignviewer/colprot.par calculate.color.column <- function(x) { ratio.true <- function(x){sum(x)/length(x)} colors <- rep(NA,length.out=length(x)) clustal.colors <- list("red" = rgb(0.9, 0.2, 0.1, 1), "blue" = rgb(0.1, 0.5, 0.9, 1), "green" = rgb(0.1, 0.8, 0.1, 1), "cyan" = rgb(0.1, 0.7, 0.7, 1), "pink" = rgb(0.9, 0.5, 0.5, 1), "magenta"= rgb(0.8, 0.3, 0.8, 1), "yellow" = rgb(0.69, 0.69, 0.0, 1), "orange" = rgb(0.9, 0.6, 0.3, 1)) colors[x=="G"] <- "orange" colors[x=="P"] <- "yellow" ## identify consensus consensus <- " " tests <- list("%"=list(aa=c("W","L","V","I","M","A","F","C","Y","H","P"), min=0.6), "#"=list(aa=c("W","L","V","I","M","A","F","C","Y","H","P"), min=0.8), "-"=list(aa=c("E","D"), min=0.5), "+"=list(aa=c("K","R"), min=0.6), "g"=list(aa="G", min=0.5), "n"=list(aa="N", min=0.5), "q"=list(aa=c("Q","E"), min=0.5), "p"=list(aa="P", min=0.5), "t"=list(aa=c("T","S"), min=0.5)) for (symbol in c("A","C","D","E","F","G","H","I","K","L","M","N","P", "Q","R","S","T","V","W","Y")) { tests[[symbol]] <- list(aa=symbol,min=0.85) } for (test in names(tests)) { if (ratio.true(x %in% tests[[test]][["aa"]]) >= tests[[test]][["min"]]) { consensus <- test } } ## handle colors color.tests <- list("G"=list(color="orange", consensus=NULL), "P"=list(color="yellow", consensus=NULL), "T"=list(color="green", consensus=c("t","S","T","%","#")), "S"=list(color="green", consensus=c("t","S","T","#")), "N"=list(color="green", consensus=c("n","N","D")), "Q"=list(color="green", consensus=c("q","Q","E","+","K","R")), "W"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "L"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "V"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "I"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "M"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "A"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p", "T","S","s","G")), "F"=list(color="blue", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "C"=list(color="blue", consensus=c("%","#","A","F","H","I","L","M","V","W","Y","S","P","p")), "C1"=list(color="pink", consensus="C", aa="C"), "H"=list(color="cyan", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "Y"=list(color="cyan", consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")), "E"=list(color="magenta", consensus=c("-","D","E","q","Q")), "D"=list(color="magenta", consensus=c("-","D","E","q","Q")), "K"=list(color="magenta", consensus=c("+","K","R","Q")), "R"=list(color="magenta", consensus=c("+","K","R","Q")) ) for (test in names(color.tests)) { aa <- test if (!is.null(color.tests[[test]]$aa)) aa <- color.tests[[test]]$aa colors[x==aa & all(consensus %in% color.tests[[test]][["consensus"]])] <- clustal.colors[[color.tests[[test]][["color"]]]] } return(colors) } name.spacing <- string.width(" ") symbol.spacing <- string.width(" ")/4 clustal.names <- rownames(clustal) clustal.names.width <- string.width(clustal.names) ## find max character width in each string column symbol.widths <- rep(max(apply(clustal,2,function(x){max(string.width(x))})), ncol(clustal)) full.width <- convertX(unit(sum(symbol.widths)+ name.spacing+max(clustal.names.width),"mm"), "npc",valueOnly=TRUE) symbol.pos.x <- cumsum(symbol.spacing+symbol.widths) symbol.pos.x <- c(0,symbol.pos.x[-length(symbol.pos.x)])+symbol.widths/2+symbol.spacing ## going to be wider than the screen if (full.width > 1) { clustal.names.x <- unit(0,"npc") symbol.offset.x <- unit(max(c(convertX(unit(1,"npc"),"mm",valueOnly=TRUE)- sum(symbol.widths)+name.spacing,0)),"mm") } else { clustal.names.x <- unit((1-full.width)/2,"npc")+unit(max(clustal.names.width)-clustal.names.width,"mm") symbol.offset.x <- unit((1-full.width)/2,"npc")+unit(max(clustal.names.width)+name.spacing,"mm") } ## plot the symbol names line.pos.y <- unit(seq((length(clustal.names)-1)/2, -(length(clustal.names)-1)/2, length.out=length(clustal.names)),"lines")+ unit(0.5,"npc") ## figure out X width; if it's bigger than the plotting area, jam ## the names in anyway grid.text(clustal.names, y=line.pos.y, x=clustal.names.x,hjust=0) grid.rect(x=unit(rep(symbol.pos.x,each=nrow(clustal)), "mm")+symbol.offset.x, y=rep(line.pos.y,times=ncol(clustal)), width=unit(symbol.widths+symbol.spacing,"mm"), height=unit(1,"line"), gp=gpar(col=NA, fill=as.vector(apply(clustal,2,calculate.color.column)))) ## plot the protein characters grid.text(as.vector(clustal), y=rep(line.pos.y,times=ncol(clustal)), x=unit(rep(symbol.pos.x,each=nrow(clustal)), "mm")+symbol.offset.x,hjust=0.5) ## plot the position strings position.strings <- seq(refseq.start,by=1,length.out=ncol(clustal)) if(length(position.strings) < 10) { wanted.position.strings <- position.strings %% 2 == 0 } else { wanted.position.strings <- position.strings %% 5 == 0 } position.strings <- as.character(position.strings) position.strings[!wanted.position.strings] <- "" grid.text(position.strings, y=unit((length(clustal.names)-1)/2+1,"lines")+unit(0.5,"npc"), x=unit(symbol.pos.x,"mm")+symbol.offset.x, rot=90 ) }