]> git.donarmstrong.com Git - plotprotein.git/commitdiff
add plot clustal code
authorDon Armstrong <don@donarmstrong.com>
Wed, 1 Oct 2014 00:20:39 +0000 (17:20 -0700)
committerDon Armstrong <don@donarmstrong.com>
Wed, 1 Oct 2014 00:20:39 +0000 (17:20 -0700)
R/plot.clustal.R [new file with mode: 0644]

diff --git a/R/plot.clustal.R b/R/plot.clustal.R
new file mode 100644 (file)
index 0000000..83c9b94
--- /dev/null
@@ -0,0 +1,183 @@
+#' plot.clustal
+#'
+library(grid)
+
+plot.clustal <- function(clustal,tree=NULL,refseq=1,refseq.start=1) {
+    string.width <-
+        function(x) {
+            sapply(x,function(y){convertX(unit(1,"strwidth",y),"mm",
+                                          valueOnly=TRUE)})
+        }
+
+    ## http://www.jalview.org/help/html/colourSchemes/clustal.html
+    ## https://www.cgl.ucsf.edu/chimera/docs/ContributedSoftware/multalignviewer/colprot.par
+    calculate.color.column <-
+        function(x) {
+            ratio.true <- function(x){sum(x)/length(x)}
+            colors <- rep(NA,length.out=length(x))
+            clustal.colors <-
+                list("red"    = rgb(0.9,  0.2,  0.1, 1),
+                     "blue"   = rgb(0.1,  0.5,  0.9, 1),
+                     "green"  = rgb(0.1,  0.8,  0.1, 1),
+                     "cyan"   = rgb(0.1,  0.7,  0.7, 1),
+                     "pink"   = rgb(0.9,  0.5,  0.5, 1),
+                     "magenta"= rgb(0.8,  0.3,  0.8, 1),
+                     "yellow" = rgb(0.69, 0.69, 0.0, 1),
+                     "orange" = rgb(0.9,  0.6,  0.3, 1))
+            colors[x=="G"] <- "orange"
+            colors[x=="P"] <- "yellow"
+            ## identify consensus
+            consensus <- " "
+            tests <-
+                list("%"=list(aa=c("W","L","V","I","M","A","F","C","Y","H","P"),
+                         min=0.6),
+                     "#"=list(aa=c("W","L","V","I","M","A","F","C","Y","H","P"),
+                         min=0.8),
+                     "-"=list(aa=c("E","D"),
+                         min=0.5),
+                     "+"=list(aa=c("K","R"),
+                         min=0.6),
+                     "g"=list(aa="G",
+                         min=0.5),
+                     "n"=list(aa="N",
+                         min=0.5),
+                     "q"=list(aa=c("Q","E"),
+                         min=0.5),
+                     "p"=list(aa="P",
+                         min=0.5),
+                     "t"=list(aa=c("T","S"),
+                         min=0.5))
+            for (symbol in c("A","C","D","E","F","G","H","I","K","L","M","N","P",
+                         "Q","R","S","T","V","W","Y")) {
+                tests[[symbol]] <- list(aa=symbol,min=0.85)
+            }
+            for (test in names(tests)) {
+                if (ratio.true(x %in% tests[[test]][["aa"]]) >=
+                    tests[[test]][["min"]]) {
+                    consensus <- test
+                }
+            }
+            ## handle colors
+            color.tests <-
+                list("G"=list(color="orange",
+                         consensus=NULL),
+                     "P"=list(color="yellow",
+                         consensus=NULL),
+                     "T"=list(color="green",
+                         consensus=c("t","S","T","%","#")),
+                     "S"=list(color="green",
+                         consensus=c("t","S","T","#")),
+                     "N"=list(color="green",
+                         consensus=c("n","N","D")),
+                     "Q"=list(color="green",
+                         consensus=c("q","Q","E","+","K","R")),
+                     "W"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "L"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "V"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "I"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "M"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "A"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p",
+                             "T","S","s","G")),
+                     "F"=list(color="blue",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "C"=list(color="blue",
+                         consensus=c("%","#","A","F","H","I","L","M","V","W","Y","S","P","p")),
+                     "C1"=list(color="pink",
+                         consensus="C",
+                         aa="C"),
+                     "H"=list(color="cyan",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "Y"=list(color="cyan",
+                         consensus=c("%","#","A","C","F","H","I","L","M","V","W","Y","P","p")),
+                     "E"=list(color="magenta",
+                         consensus=c("-","D","E","q","Q")),
+                     "D"=list(color="magenta",
+                         consensus=c("-","D","E","q","Q")),
+                     "K"=list(color="magenta",
+                         consensus=c("+","K","R","Q")),
+                     "R"=list(color="magenta",
+                         consensus=c("+","K","R","Q"))
+                     )
+            for (test in names(color.tests)) {
+                aa <- test
+                if (!is.null(color.tests[[test]]$aa))
+                    aa <- color.tests[[test]]$aa
+                colors[x==aa & all(consensus %in% color.tests[[test]][["consensus"]])] <-
+                    clustal.colors[[color.tests[[test]][["color"]]]]
+            }
+            return(colors)
+        }
+            
+    name.spacing <- string.width("  ")
+    symbol.spacing <- string.width(" ")/4
+    clustal.names <- rownames(clustal)
+    clustal.names.width <-
+        string.width(clustal.names)
+    ## find max character width in each string column
+    symbol.widths <- rep(max(apply(clustal,2,function(x){max(string.width(x))})),
+                         ncol(clustal))
+    full.width <- convertX(unit(sum(symbol.widths)+
+                                    name.spacing+max(clustal.names.width),"mm"),
+                           "npc",valueOnly=TRUE)
+    symbol.pos.x <- cumsum(symbol.spacing+symbol.widths)
+    symbol.pos.x <- c(0,symbol.pos.x[-length(symbol.pos.x)])+symbol.widths/2+symbol.spacing
+    ## going to be wider than the screen
+    if (full.width > 1) {
+        clustal.names.x <-
+            unit(0,"npc")
+        symbol.offset.x <-
+            unit(max(c(convertX(unit(1,"npc"),"mm",valueOnly=TRUE)-
+                           sum(symbol.widths)+name.spacing,0)),"mm")
+    } else {
+        clustal.names.x <-
+            unit((1-full.width)/2,"npc")+unit(max(clustal.names.width)-clustal.names.width,"mm")
+        symbol.offset.x <-
+            unit((1-full.width)/2,"npc")+unit(max(clustal.names.width)+name.spacing,"mm")
+    }
+    
+    ## plot the symbol names
+    line.pos.y <-
+        unit(seq((length(clustal.names)-1)/2,
+                 -(length(clustal.names)-1)/2,
+                 length.out=length(clustal.names)),"lines")+
+                     unit(0.5,"npc")
+    
+
+    ## figure out X width; if it's bigger than the plotting area, jam
+    ## the names in anyway
+    grid.text(clustal.names,
+              y=line.pos.y,
+              x=clustal.names.x,hjust=0)
+    grid.rect(x=unit(rep(symbol.pos.x,each=nrow(clustal)),
+                  "mm")+symbol.offset.x,
+              y=rep(line.pos.y,times=ncol(clustal)),
+              width=unit(symbol.widths+symbol.spacing,"mm"),
+              height=unit(1,"line"),
+              gp=gpar(col=NA,
+                      fill=as.vector(apply(clustal,2,calculate.color.column))))
+    ## plot the protein characters
+    grid.text(as.vector(clustal),
+              y=rep(line.pos.y,times=ncol(clustal)),
+              x=unit(rep(symbol.pos.x,each=nrow(clustal)),
+                  "mm")+symbol.offset.x,hjust=0.5)
+    ## plot the position strings
+    position.strings <- seq(refseq.start,by=1,length.out=ncol(clustal))
+    if(length(position.strings) < 10) {
+        wanted.position.strings <- postiion.strings %% 2 == 0
+    } else {
+        wanted.position.strings <- position.strings %% 5 == 0
+    }
+    position.strings <- as.character(position.strings)
+    position.strings[!wanted.position.strings] <- ""
+    grid.text(position.strings,
+              y=unit((length(clustal.names)-1)/2+1,"lines")+unit(0.5,"npc"),
+              x=unit(symbol.pos.x,"mm")+symbol.offset.x,
+              rot=90
+              )
+}