]> git.donarmstrong.com Git - bin.git/blob - gene_search
add script to find bts mail from the log
[bin.git] / gene_search
1 #! /usr/bin/perl
2 # , and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2011 by Don Armstrong <don@donarmstrong.com>.
6 # $Id: perl_script 1825 2011-01-02 01:53:43Z don $
7
8
9 use warnings;
10 use strict;
11
12 use Getopt::Long;
13 use Pod::Usage;
14
15 use Bio::DB::EUtilities;
16
17 use Encode qw(encode_utf8);
18 use Term::ANSIColor qw(:constants);
19 use Text::Wrap;
20
21
22 =head1 NAME
23
24 pubmed_search - 
25
26 =head1 SYNOPSIS
27
28  pubmed_search [options] [searchterms]
29
30  Options:
31   --debug, -d debugging level (Default 0)
32   --help, -h display this help
33   --man, -m display manual
34
35 =head1 OPTIONS
36
37 =over
38
39 =item B<--debug, -d>
40
41 Debug verbosity. (Default 0)
42
43 =item B<--help, -h>
44
45 Display brief usage information.
46
47 =item B<--man, -m>
48
49 Display this manual.
50
51 =back
52
53 =head1 EXAMPLES
54
55
56 =cut
57
58
59 use vars qw($DEBUG);
60
61 my %options = (debug           => 0,
62                help            => 0,
63                man             => 0,
64                color           => 1,
65                org_mode        => 0,
66                symbol          => 0,
67                organism        => 'Homo sapiens',
68                );
69
70 GetOptions(\%options,
71            'color|c!',
72            'org_mode|org-mode!',
73            'symbol|s!',
74            'organism|o=s',
75            'debug|d+','help|h|?','man|m');
76
77 pod2usage() if $options{help};
78 pod2usage({verbose=>2}) if $options{man};
79
80 $DEBUG = $options{debug};
81
82 my @USAGE_ERRORS;
83 if (not @ARGV) {
84     push @USAGE_ERRORS,"You must pass something";
85 }
86
87 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
88
89 my $term = join(' ',@ARGV);
90 if ($options{symbol}) {
91     $term = join(' OR ',map {'('.$_.'[Preferred Symbol] AND '.$options{organism}.'[Orgn])'} @ARGV);
92 }
93
94 my $search = Bio::DB::EUtilities->new(-eutil => 'esearch',
95                                        -email => 'don@donarmstrong.com',
96                                        -db    => 'gene',
97                                        -term => $term,
98                                        -retmax => 1000,
99                                       );
100 my @ids = $search->get_ids();
101 # print scalar(@ids)." results:\n";
102 exit 0 unless @ids;
103 my $esummary = Bio::DB::EUtilities->new(-eutil => 'efetch',
104                                         -email => 'don@donarmstrong.com',
105                                         -db    => 'gene',
106                                         -id  => \@ids,
107                                         -retmode => 'xml',
108                                        );
109 #print $esummary->get_Response()->content() if $DEBUG;
110 use XML::LibXML;
111 my $xml = XML::LibXML->load_xml(string => $esummary->get_Response()->content());
112 for my $gene ($xml->findnodes('Entrezgene-Set/Entrezgene')) {
113     print $gene->toString if $DEBUG;
114     my ($locus) = $gene->findnodes('.//Gene-ref_locus');
115     my ($desc) = $gene->findnodes('.//Gene-ref_desc');
116     my ($summary) = $gene->findnodes('.//Entrezgene_summary');
117     my ($idiogram) = $gene->findnodes('.//Gene-ref_maploc');
118     next unless defined $idiogram;
119     my ($start) = $gene->findnodes('.//Gene-commentary_seqs/Seq-loc/Seq-loc_int/Seq-interval/Seq-interval_from');
120     my ($stop) = $gene->findnodes('.//Gene-commentary_seqs/Seq-loc/Seq-loc_int/Seq-interval/Seq-interval_to');
121     if ($options{org_mode}) {
122         print "* ";
123     }
124     print BOLD GREEN if $options{color};
125     print $locus->textContent();
126     print ": ";
127     print RESET if $options{color};
128     print "(".$idiogram->textContent();
129     print " ".$start->textContent().":";
130     print $stop->textContent().") ";
131     print BOLD CYAN if $options{color};
132     print encode_utf8($desc->textContent())."\n";
133     print RESET if $options{color};
134     print BOLD MAGENTA if $options{color};
135     if (defined $summary) {
136         if ($options{org_mode}) {
137             print "** Summary\n";
138         }
139         $summary = $summary->textContent();
140         $summary =~ s/^\s*//mg;
141         $summary = encode_utf8($summary);
142         print wrap('','',$summary);
143         print "\n\n";
144         print RESET if $options{color};
145     }
146 }
147
148 __END__