]> git.donarmstrong.com Git - imprinted_genes.git/blob - combine_imprinted_genes.R
add code to calculate the imprinted genes of mouse
[imprinted_genes.git] / combine_imprinted_genes.R
1 library(data.table)
2
3 args <- commandArgs(trailingOnly=TRUE)
4
5 geneimprint <- fread(args[1])
6 geneimprint <- geneimprint[!grepl(" ",Gene),]
7 parent <- fread(args[2])
8 ### fix up the 0 prefixed chromosomes
9 parent[,chr:=gsub("^0","",chromosome)]
10 ### remove aliases in ()
11 parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)]
12 ### remove aliases after ,
13 parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)]
14
15 parent <- parent[grepl("^[A-Z0-9]+$",Gene),]
16
17
18 setkey(parent,"Gene")
19 setkey(geneimprint,"Gene")
20
21 imprinted.genes <-
22     union(gsub("[\\*\\@]$","",parent[,Gene]),
23           gsub("[\\*\\@]$","",geneimprint[,Gene]))
24
25 gene.aliases <- fread(args[3])
26 setkey(gene.aliases,"alias")
27
28 imprinted.genes <-
29     sort(sapply(imprinted.genes,
30            function(x){if(is.na(gene.aliases[x,gene])) {
31                            return(x)
32                        } else {
33                            return(gene.aliases[x,gene])
34                        }}))
35
36 write.table(file=args[length(args)],
37             imprinted.genes,
38             sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
39