#!/usr/bin/make -f
+R=R
+ROPTS=-q --no-save --no-restore-data
+
geneimprint_human.html:
wget -O $@ "http://www.geneimprint.com/site/genes-by-species.Homo+sapiens"
parent_of_origin.txt: parent_of_origin.html parse_parent_of_origin.pl
./parse_parent_of_origin.pl $< > $@
+
+combined_imprinted_genes.txt: combine_imprinted_genes.R geneimprint_human.txt parent_of_origin.txt
+ $(R) $(ROPTS) -f $< --args $(wordlist 2,$(words $^),$^) $@
--- /dev/null
+library(data.table)
+
+args <- commandArgs(trailingOnly=TRUE)
+
+geneimprint <- fread(args[1])
+parent <- fread(args[2])
+### fix up the 0 prefixed chromosomes
+parent[,chr:=gsub("^0","",chromosome)]
+### remove aliases in ()
+parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)]
+### remove aliases after ,
+parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)]
+
+parent <- parent[grepl("^[A-Z0-9]+$",Gene),]
+setkey(parent,"Gene")
+setkey(geneimprint,"Gene")
+
+imprinted.genes <-
+ union(parent[,Gene],geneimprint[,Gene])
+
+write.table(file=args[length(args)],
+ imprinted.genes,
+ sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
+