]> git.donarmstrong.com Git - imprinted_genes.git/blob - combine_imprinted_genes.R
782da8e011fcda6fd2791b1f23b9f1b63079affc
[imprinted_genes.git] / combine_imprinted_genes.R
1 library(data.table)
2
3 args <- commandArgs(trailingOnly=TRUE)
4
5 geneimprint <- fread(args[1])
6 parent <- fread(args[2])
7 ### fix up the 0 prefixed chromosomes
8 parent[,chr:=gsub("^0","",chromosome)]
9 ### remove aliases in ()
10 parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)]
11 ### remove aliases after ,
12 parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)]
13
14 parent <- parent[grepl("^[A-Z0-9]+$",Gene),]
15 setkey(parent,"Gene")
16 setkey(geneimprint,"Gene")
17
18 imprinted.genes <-
19     union(parent[,Gene],geneimprint[,Gene])
20
21 write.table(file=args[length(args)],
22             imprinted.genes,
23             sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
24