From: Don Armstrong Date: Fri, 8 May 2015 17:58:17 +0000 (-0700) Subject: add rules to build combined imprinted genes file X-Git-Url: https://git.donarmstrong.com/?p=imprinted_genes.git;a=commitdiff_plain;h=8f5f94cc8b0e750124693ffe3490175b9a049ff0 add rules to build combined imprinted genes file --- diff --git a/Makefile b/Makefile index cb3d035..b87aa21 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,8 @@ #!/usr/bin/make -f +R=R +ROPTS=-q --no-save --no-restore-data + geneimprint_human.html: wget -O $@ "http://www.geneimprint.com/site/genes-by-species.Homo+sapiens" @@ -12,3 +15,6 @@ geneimprint_human.txt: geneimprint_human.html parse_geneimprint.pl parent_of_origin.txt: parent_of_origin.html parse_parent_of_origin.pl ./parse_parent_of_origin.pl $< > $@ + +combined_imprinted_genes.txt: combine_imprinted_genes.R geneimprint_human.txt parent_of_origin.txt + $(R) $(ROPTS) -f $< --args $(wordlist 2,$(words $^),$^) $@ diff --git a/combine_imprinted_genes.R b/combine_imprinted_genes.R new file mode 100644 index 0000000..782da8e --- /dev/null +++ b/combine_imprinted_genes.R @@ -0,0 +1,24 @@ +library(data.table) + +args <- commandArgs(trailingOnly=TRUE) + +geneimprint <- fread(args[1]) +parent <- fread(args[2]) +### fix up the 0 prefixed chromosomes +parent[,chr:=gsub("^0","",chromosome)] +### remove aliases in () +parent[,Gene:=gsub("\\s*\\([^\\)]+\\)\\s*","",gene)] +### remove aliases after , +parent[,Gene:=gsub("\\s*,\\s*.+","",Gene)] + +parent <- parent[grepl("^[A-Z0-9]+$",Gene),] +setkey(parent,"Gene") +setkey(geneimprint,"Gene") + +imprinted.genes <- + union(parent[,Gene],geneimprint[,Gene]) + +write.table(file=args[length(args)], + imprinted.genes, + sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) +