]> git.donarmstrong.com Git - imprinted_genes.git/blob - combine_imprinted_genes.R
add git ignore for downloadable files
[imprinted_genes.git] / combine_imprinted_genes.R
1 library(data.table)
2
3 args <- commandArgs(trailingOnly=TRUE)
4
5 geneimprint <- fread(args[1])
6 geneimprint <- geneimprint[!grepl(" ",Gene),]
7
8 geneimprint <- geneimprint[Status!="Not Imprinted",]
9 geneimprint <- geneimprint[Status!="Unknown",]
10
11 parent <- fread(args[2])
12 ### fix up the 0 prefixed chromosomes
13 parent[,chr:=gsub("^0","",chromosome)]
14 ### remove aliases in ()
15 parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)]
16 ### remove aliases after ,
17 parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)]
18
19 parent <- parent[grepl("^[A-Z0-9]+$",Gene),]
20
21
22 setkey(parent,"Gene")
23 setkey(geneimprint,"Gene")
24
25 imprinted.genes <-
26     union(gsub("[\\*\\@]$","",parent[,Gene]),
27           gsub("[\\*\\@]$","",geneimprint[,Gene]))
28
29 gene.aliases <- fread(args[3])
30 setkey(gene.aliases,"alias")
31
32 imprinted.genes <-
33     sort(sapply(imprinted.genes,
34            function(x){if(is.na(gene.aliases[x,gene])) {
35                            return(x)
36                        } else {
37                            return(gene.aliases[x,gene])
38                        }}))
39
40 write.table(file=args[length(args)],
41             imprinted.genes,
42             sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
43