2 # load_affymetrix_probe_annotations.pl loads affymetrix probe annotations
3 # and is released under the terms of the GNU GPL version 3, or any
4 # later version, at your option. See the file README and COPYING for
6 # Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
17 load_affymetrix_probe_annotations.pl - loads affymetrix probe annotations
21 load_affymetrix_probe_annotations.pl [options] [annotation files]
24 --service, -s pgsql service
25 --progress, -p show progress bar
26 --debug, -d debugging level (Default 0)
27 --help, -h display this help
28 --man, -m display manual
38 =item B<--progress,-p>
44 Debug verbosity. (Default 0)
48 Display brief usage information.
58 load_affymetrix_probe_annotations.pl
65 use Term::ProgressBar;
70 my %options = (debug => 0,
80 'debug|d+','help|h|?','man|m');
82 pod2usage() if $options{help};
83 pod2usage({verbose=>2}) if $options{man};
85 $DEBUG = $options{debug};
89 # push @USAGE_ERRORS,"You must pass something";
92 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
94 my $dbh = DBI->connect("dbi:Pg:service=$options{service}",
95 '','',{AutoCommit => 0}) or
96 die "Unable to connect to database: ".$DBI::errstr;
99 $sth{insert_annotation} = $dbh->prepare(<<'END') // die "Unable to prepare insert annotation statement: ".$dbh->errstr;
100 INSERT INTO affy_annotation
103 array_name,entrez_id,
104 refseq_prot,refseq_transcript) VALUES ($1,$2,$3,$4,$5,$6,$7,$8);
107 $sth{delete_annotation_id} = $dbh->prepare(<<'END') // die "Unable to prepare delete annotation id statement: ".$dbh->errstr;
108 DELETE FROM affy_annotation aa WHERE aa.probe = $1;
111 $sth{select_annotation_id} = $dbh->prepare(<<'END') // die "Unable to prepare select annotation id statement: ".$dbh->errstr;
112 SELECT aa.id FROM affy_annotation aa WHERE aa.probe = $1;
116 $sth{select_affy_probe_id} = $dbh->prepare(<<'END') // die "Unable to prepare select annotation id statement: ".$dbh->errstr;
117 SELECT id FROM affy_probe ap WHERE ap.probe = $1;
120 $sth{insert_affy_probe_id} = $dbh->prepare(<<'END') // die "Unable to prepare insert affy probe id statement: ".$dbh->errstr;
121 INSERT INTO affy_probe (probe) VALUES ($1);
127 for my $ifn (@ARGV) {
128 my $ifh = IO::File->new($ifn,'r') or
129 die "Unable to open $ifn for reading: $!";
138 (probe => qr/(?i)Probe\s*Set\s*ID/,
139 gene_symbol => qr/(?i)Gene\s*Symbol/,
140 gene_name => qr/(?i)Gene\s*Title/,
141 species => qr/(?i)Species\s*Scientific\s*Name/,
142 array_name => qr/(?i)GeneChip\s*Array/,
143 entrez_id => qr/(?i)Entrez\s*Gene/,
144 refseq_prot => qr/(?i)RefSeq\s*Protein\s*ID/,
145 refseq_transcript => qr/(?i)RefSeq\s*Transcript\s*ID/,
150 if ($options{progress}) {
151 if ($ifh->seek(0,SEEK_END)) {
152 $p = Term::ProgressBar->new({count => $ifh->tell,
155 $ifh->seek(0,SEEK_SET);
159 my $csv = Text::CSV->new({sep_char=>','});
161 my %important_headers;
165 if (not $csv->parse($_)) {
166 die "Unable to parse line $. of file: ".$csv->error_diag();
168 my @row = $csv->fields();
171 @headers{@header} = 0..$#row;
172 for my $header (keys %header_regex) {
174 grep { $_ =~ $header_regex{$header}
176 $important_headers{$header} = $headers{$match[0]} if @match;
179 p %important_headers;
182 die "unable to find header match for $header";
187 insert_annotation($dbh,\%sth,
188 {fixup_row(\%important_headers,\@row)
192 $p->update($ifh->tell);
198 sub insert_annotation {
199 my ($dbh,$sth,$annot) = @_;
201 $annot->{probe_id} = select_affy_probe_id(@_);
202 # see if this annotation already exists
203 return unless defined $annot->{probe_id};
204 my $annot_id = select_annotation_id(@_);
206 if (not defined $annot_id) {
207 my $rv = $sth->{insert_annotation}->execute(@{$annot}{(qw(probe_id gene_symbol gene_name species array_name entrez_id),
208 qw(refseq_prot refseq_transcript),
210 // die "Unable to execute statement properly: ".$dbh->errstr;
211 $sth->{insert_annotation}->finish;
213 print STDERR "probe: $annot->{probe} is already annotated ($annot_id)\n" if $DEBUG;
217 sub select_annotation_id {
218 my ($dbh,$sth,$annot) = @_;
219 if (not defined $annot->{probe_id}) {
220 $annot->{probe_id} = select_affy_probe_id(@_);
221 return unless defined $annot->{probe_id};
223 my $rv = $sth->{select_annotation_id}->execute($annot->{probe_id}) //
224 die "Unable to execute statement properly: ".$dbh->errstr;
225 my ($sample_id) = map {ref $_ ?@{$_}:()}
226 map {ref $_ ?@{$_}:()} $sth->{select_annotation_id}->fetchall_arrayref([0]);
227 $sth->{select_annotation_id}->finish;
231 sub select_affy_probe_id {
232 my ($dbh,$sth,$annot) = @_;
233 my $rv = $sth->{select_affy_probe_id}->execute($annot->{probe}) //
234 die "Unable to execute statement properly: ".$dbh->errstr;
235 my ($probe_id) = map {ref $_ ?@{$_}:()}
236 map {ref $_ ?@{$_}:()} $sth->{select_affy_probe_id}->fetchall_arrayref([0]);
237 $sth->{select_affy_probe_id}->finish;
244 for my $h (keys %{$ih}) {
245 $r{$h} = $r->[$ih->{$h}];