2 # load_affymetrix_probe_annotations.pl loads affymetrix probe annotations
3 # and is released under the terms of the GNU GPL version 3, or any
4 # later version, at your option. See the file README and COPYING for
6 # Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
17 load_affymetrix_probe_annotations.pl - loads affymetrix probe annotations
21 load_affymetrix_probe_annotations.pl [options] [annotation files]
24 --service, -s pgsql service
25 --debug, -d debugging level (Default 0)
26 --help, -h display this help
27 --man, -m display manual
37 =item B<--progress,-p>
41 Debug verbosity. (Default 0)
45 Display brief usage information.
55 load_affymetrix_probe_annotations.pl
62 use Term::ProgressBar;
67 my %options = (debug => 0,
77 'debug|d+','help|h|?','man|m');
79 pod2usage() if $options{help};
80 pod2usage({verbose=>2}) if $options{man};
82 $DEBUG = $options{debug};
86 # push @USAGE_ERRORS,"You must pass something";
89 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
91 my $dbh = DBI->connect("dbi:Pg:service=$options{service}",
92 '','',{AutoCommit => 0}) or
93 die "Unable to connect to database: ".$DBI::errstr;
96 $sth{insert_annotation} = $dbh->prepare(<<'END') // die "Unable to prepare insert annotation statement: ".$dbh->errstr;
97 INSERT INTO affy_annotation
100 array_name,entrez_id,
101 refseq_prot,refseq_transcript) VALUES ($1,$2,$3,$4,$5,$6,$7,$8);
104 $sth{delete_annotation_id} = $dbh->prepare(<<'END') // die "Unable to prepare delete annotation id statement: ".$dbh->errstr;
105 DELETE FROM affy_annotation aa WHERE aa.probe = $1;
108 $sth{select_annotation_id} = $dbh->prepare(<<'END') // die "Unable to prepare select annotation id statement: ".$dbh->errstr;
109 SELECT aa.id FROM affy_annotation aa WHERE aa.probe = $1;
113 $sth{select_affy_probe_id} = $dbh->prepare(<<'END') // die "Unable to prepare select annotation id statement: ".$dbh->errstr;
114 SELECT id FROM affy_probe ap WHERE ap.probe = $1;
117 $sth{insert_affy_probe_id} = $dbh->prepare(<<'END') // die "Unable to prepare insert affy probe id statement: ".$dbh->errstr;
118 INSERT INTO affy_probe (probe) VALUES ($1);
124 for my $ifn (@ARGV) {
125 my $ifh = IO::File->new($ifn,'r') or
126 die "Unable to open $ifn for reading: $!";
135 (probe => qr/(?i)Probe\s*Set\s*ID/,
136 gene_symbol => qr/(?i)Gene\s*Symbol/,
137 gene_name => qr/(?i)Gene\s*Title/,
138 species => qr/(?i)Species\s*Scientific\s*Name/,
139 array_name => qr/(?i)GeneChip\s*Array/,
140 entrez_id => qr/(?i)Entrez\s*Gene/,
141 refseq_prot => qr/(?i)RefSeq\s*Protein\s*ID/,
142 refseq_transcript => qr/(?i)RefSeq\s*Transcript\s*ID/,
147 if ($options{progress}) {
148 if ($ifh->seek(0,SEEK_END)) {
149 $p = Term::ProgressBar->new({count => $ifh->tell,
152 $ifh->seek(0,SEEK_SET);
156 my $csv = Text::CSV->new({sep_char=>','});
158 my %important_headers;
162 if (not $csv->parse($_)) {
163 die "Unable to parse line $. of file: ".$csv->error_diag();
165 my @row = $csv->fields();
168 @headers{@header} = 0..$#row;
169 for my $header (keys %header_regex) {
171 grep { $_ =~ $header_regex{$header}
173 $important_headers{$header} = $headers{$match[0]} if @match;
176 p %important_headers;
179 die "unable to find header match for $header";
184 insert_annotation($dbh,\%sth,
185 {fixup_row(\%important_headers,\@row)
189 $p->update($ifh->tell);
195 sub insert_annotation {
196 my ($dbh,$sth,$annot) = @_;
198 $annot->{probe_id} = select_affy_probe_id(@_);
199 # see if this annotation already exists
200 return unless defined $annot->{probe_id};
201 my $annot_id = select_annotation_id(@_);
203 if (not defined $annot_id) {
204 my $rv = $sth->{insert_annotation}->execute(@{$annot}{(qw(probe_id gene_symbol gene_name species array_name entrez_id),
205 qw(refseq_prot refseq_transcript),
207 // die "Unable to execute statement properly: ".$dbh->errstr;
208 $sth->{insert_annotation}->finish;
210 print STDERR "probe: $annot->{probe} is already annotated ($annot_id)\n" if $DEBUG;
214 sub select_annotation_id {
215 my ($dbh,$sth,$annot) = @_;
216 if (not defined $annot->{probe_id}) {
217 $annot->{probe_id} = select_affy_probe_id(@_);
218 return unless defined $annot->{probe_id};
220 my $rv = $sth->{select_annotation_id}->execute($annot->{probe_id}) //
221 die "Unable to execute statement properly: ".$dbh->errstr;
222 my ($sample_id) = map {ref $_ ?@{$_}:()}
223 map {ref $_ ?@{$_}:()} $sth->{select_annotation_id}->fetchall_arrayref([0]);
224 $sth->{select_annotation_id}->finish;
228 sub select_affy_probe_id {
229 my ($dbh,$sth,$annot) = @_;
230 my $rv = $sth->{select_affy_probe_id}->execute($annot->{probe}) //
231 die "Unable to execute statement properly: ".$dbh->errstr;
232 my ($probe_id) = map {ref $_ ?@{$_}:()}
233 map {ref $_ ?@{$_}:()} $sth->{select_affy_probe_id}->fetchall_arrayref([0]);
234 $sth->{select_affy_probe_id}->finish;
241 for my $h (keys %{$ih}) {
242 $r{$h} = $r->[$ih->{$h}];