]> git.donarmstrong.com Git - imprinted_genes.git/blobdiff - combine_imprinted_genes.R
sort and add missing genes to the list of imporinted genes
[imprinted_genes.git] / combine_imprinted_genes.R
index 782da8e011fcda6fd2791b1f23b9f1b63079affc..7b75d523fd5cd0bcc440435b5b1ab73491d13e1b 100644 (file)
@@ -3,6 +3,7 @@ library(data.table)
 args <- commandArgs(trailingOnly=TRUE)
 
 geneimprint <- fread(args[1])
+geneimprint <- geneimprint[!grepl(" ",Gene),]
 parent <- fread(args[2])
 ### fix up the 0 prefixed chromosomes
 parent[,chr:=gsub("^0","",chromosome)]
@@ -12,11 +13,25 @@ parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)]
 parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)]
 
 parent <- parent[grepl("^[A-Z0-9]+$",Gene),]
+
+
 setkey(parent,"Gene")
 setkey(geneimprint,"Gene")
 
 imprinted.genes <-
-    union(parent[,Gene],geneimprint[,Gene])
+    union(gsub("[\\*\\@]$","",parent[,Gene]),
+          gsub("[\\*\\@]$","",geneimprint[,Gene]))
+
+gene.aliases <- fread(args[3])
+setkey(gene.aliases,"alias")
+
+imprinted.genes <-
+    sort(sapply(imprinted.genes,
+           function(x){if(is.na(gene.aliases[x,gene])) {
+                           return(x)
+                       } else {
+                           return(gene.aliases[x,gene])
+                       }}))
 
 write.table(file=args[length(args)],
             imprinted.genes,