#! /usr/bin/perl # snp_study_table outputs a table of study results, and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2011 by Don Armstrong . # $Id: perl_script 1825 2011-01-02 01:53:43Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME snp_study_table - output an xls table of study results =head1 SYNOPSIS snp_study_table [options] [snps] Options: --output output filename (Default STDOUT) --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 SNPS SNPS can be specified as rs12345 or as chr-pos; in the later case, the ga_chr_pos table will be used, in the former, the ga_snp table will be used. =head1 OPTIONS =over =item B<--output> Output filename (defaults to STDOUT) =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use vars qw($DEBUG); use DBI; use Spreadsheet::WriteExcel; my %options = (debug => 0, help => 0, man => 0, ); GetOptions(\%options, 'output|o=s', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my $out_fh = \*STDOUT; if (exists $options{output}) { $out_fh = IO::File->new($options{output},'w') or die "Unable to open $options{output} for writing: $!"; } my $wb = Spreadsheet::WriteExcel->new($out_fh); my $ws = $wb->add_worksheet('snp_results'); my $dbh = DBI->connect("dbi:Pg:service=snp",'','',{AutoCommit => 0}) or die "Unable to connect to database: ".$DBI::errstr; my $sth_ga_info = $dbh->prepare(<<'END') // die "Unable to prepare ga_snp info statement: ".$dbh->errstr; SELECT * FROM ga_snp WHERE snp_id=?; END my $sth_ga_cp_info = $dbh->prepare(<<'END') // die "Unable to prepare ga_snp info statement: ".$dbh->errstr; SELECT * FROM ga_chr_pos WHERE chr=? AND pos=?; END if (not @ARGV) { while () { next if /^#/; chomp; push @ARGV,$_; } } my %studies; my %snps; for my $snp (@ARGV) { my $results; if ($snp =~ /^(?:rs)(\d+)$/) { $results = get_snp_results(dbh => $dbh, sth => $sth_ga_info, bind => [$1], ); } elsif ($snp =~ /^(\d+)-(\d+)$/) { $results = get_snp_results(dbh => $dbh, sth => $sth_ga_cp_info, bind => [$1,$2], ); } else { print STDERR "Invalid snp format '$snp'\n"; next; } $snps{$snp} = $results; for my $result (@{$results}) { $studies{$result->{study_name}}{$result->{subpart_name}} = 1; } } $dbh->disconnect(); #column names my @cn = ('A'..'Z','AA'..'ZZ'); my $r = 1; my $c = 0; my $f_center = $wb->add_format(align => 'center'); $ws->write($cn[$c++].($r+2),'SNP'); for my $study (sort keys %studies) { my $n_substudy = keys %{$studies{$study}}; $ws->write($cn[$c].$r,$study); $ws->merge_range($r-1,$c,$r-1,$c+$n_substudy*3-1,$study,$f_center); for my $substudy (sort keys %{$studies{$study}}) { $ws->write($cn[$c].($r+1),$substudy); $ws->merge_range($r,$c,$r,$c+2,$substudy,$f_center); $ws->write($cn[$c].($r+2),'P value'); $ws->write($cn[$c+1].($r+2),'Q value'); $ws->write($cn[$c+2].($r+2),'FDR'); $c+=3; } } $r+=3; $c=0; for my $snp (sort keys %snps) { $ws->write($cn[$c++].$r,$snp); my %snp_studies; for my $row (@{$snps{$snp}}) { $snp_studies{$row->{study_name}}{$row->{subpart_name}} = [@{$row}{qw(pvalue qvalue fdr)}]; } for my $study (sort keys %studies) { for my $substudy (sort keys %{$studies{$study}}) { if (not exists $snp_studies{$study} or not exists $snp_studies{$study}{$substudy} ) { $c+=3; } else { for (0..2) { $ws->write($cn[$c++].$r,defined $snp_studies{$study}{$substudy}[$_]?$snp_studies{$study}{$substudy}[$_]:''); } } } } $r++; $c=0; } sub get_snp_results{ my %param = @_; my $rv = $param{sth}->execute(@{$param{bind}}) or die "Unable to execute statement properly: ".$param{dbh}->errstr; my $results = $param{sth}->fetchall_arrayref({}); if ($param{sth}->err) { print STDERR $param{sth}->errstr; return {}; } return $results; } __END__