]> git.donarmstrong.com Git - imprinted_genes.git/blobdiff - combine_imprinted_genes.R
add rules to build combined imprinted genes file
[imprinted_genes.git] / combine_imprinted_genes.R
diff --git a/combine_imprinted_genes.R b/combine_imprinted_genes.R
new file mode 100644 (file)
index 0000000..782da8e
--- /dev/null
@@ -0,0 +1,24 @@
+library(data.table)
+
+args <- commandArgs(trailingOnly=TRUE)
+
+geneimprint <- fread(args[1])
+parent <- fread(args[2])
+### fix up the 0 prefixed chromosomes
+parent[,chr:=gsub("^0","",chromosome)]
+### remove aliases in ()
+parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)]
+### remove aliases after ,
+parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)]
+
+parent <- parent[grepl("^[A-Z0-9]+$",Gene),]
+setkey(parent,"Gene")
+setkey(geneimprint,"Gene")
+
+imprinted.genes <-
+    union(parent[,Gene],geneimprint[,Gene])
+
+write.table(file=args[length(args)],
+            imprinted.genes,
+            sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
+