From 0f5916166f84fdf5f4265065a57b95d8e8185a4a Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Tue, 13 Dec 2011 16:28:46 -0800 Subject: [PATCH] add extra data loader; update intron exon loader and add util to retreive human refseq stuff --- utils/intron_exon_loader.pl | 4 ++-- utils/load_extra_data.sh | 25 +++++++++++++++++++++++++ utils/retreive_human_refseq.sh | 3 ++- 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 utils/load_extra_data.sh diff --git a/utils/intron_exon_loader.pl b/utils/intron_exon_loader.pl index 99ace9d..fef9dca 100755 --- a/utils/intron_exon_loader.pl +++ b/utils/intron_exon_loader.pl @@ -46,11 +46,11 @@ Display this manual. zcat CHR_*/hs_ref_GRCh37.p2_*.gbs.gz | \ intron_exon_loader| \ - psql snp -c "COPY mrna FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; + psql snp -c "COPY mrna FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; zcat CHR_*/hs_ref_GRCh37.p2_*.gbs.gz | \ intron_exon_loader --output cds| \ - psql snp -c "COPY cds FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; + psql snp -c "COPY cds FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; =cut diff --git a/utils/load_extra_data.sh b/utils/load_extra_data.sh new file mode 100644 index 0000000..182ffc0 --- /dev/null +++ b/utils/load_extra_data.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +DATA_DIR=/srv/ncbi/refseq/ +SCHEMA_DIR=/srv/ncbi/db_snp_utils/schema +UTIL_DIR=${SCHEMA_DIR}/../utils/ + + +psql snp < ${SCHEMA_DIR}/extra_schema/mrna_cds_table.sql +psql snp < ${SCHEMA_DIR}/extra_schema/intron_exon_schema.sql + +zcat ${DATA_DIR}/human.rna.gbff.gz | \ + ${UTIL_DIR}/human_mrna_cds_insert.pl | \ + psql snp -c "COPY mrna_cds_table FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; + +zcat ${DATA_DIR}/seq_contig.md.gz | \ + ${UTIL_DIR}/import_seq_component_md.pl | \ + psql snp -c "COPY contigs FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; + +zcat ${DATA_DIR}/CHR_*/hs_ref_GRCh37.p*_*.gbs.gz | \ + ${UTIL_DIR}/intron_exon_loader.pl --type mrna| \ + psql snp -c "COPY mrna FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; + +zcat ${DATA_DIR}/CHR_*/hs_ref_GRCh37.p*_*.gbs.gz | \ + ${UTIL_DIR}/intron_exon_loader.pl --type cds| \ + psql snp -c "COPY cds FROM STDIN WITH DELIMITER ' ' NULL AS 'NULL'"; diff --git a/utils/retreive_human_refseq.sh b/utils/retreive_human_refseq.sh index 60440b8..a7d6d78 100755 --- a/utils/retreive_human_refseq.sh +++ b/utils/retreive_human_refseq.sh @@ -1,4 +1,5 @@ #!/bin/sh rsync -rvP -m --include '**GRCh37.p*_*.gbs.gz' --include '**GRCh37.p*_*.fa.gz' --include '**/' --exclude '*' rsync://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/CHR* . -rsync -rvP rsync://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.gbff.gz . \ No newline at end of file +rsync -rvP rsync://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.gbff.gz . +rsync -LvP rsync://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/mapview/seq_contig.md.gz . -- 2.39.2