3 # Copyright (C) 2007 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Stuff for interacting with NCBI Entrez
28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
36 use vars qw( @ISA @EXPORT );
38 @ISA = qw( Exporter );
41 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
46 # Martin A. Hansen, March 2007.
48 # connects to the ncbi website and retrieves a genbank record,
51 my ( $db, # database <nucleotide|protein>
53 $type, # retrieval type <gb|gp>
58 my ( $content, @lines, $i, $seq );
60 $content = get "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$db&id=$id&rettype=$type";
68 # Martin A. Hansen, March 2007.
70 # connects to the ncbi website and retrieves a genbank record,
71 # from which the sequence is parsed and returned.
73 my ( $db, # database <nucleotide|protein>
75 $type, # retrieval type <gb|gp>
80 my ( $content, @lines, $i, $seq );
82 $content = get "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$db&id=$id&rettype=$type";
84 @lines = split "\n", $content;
88 while ( $lines[ $i ] !~ /^ORIGIN/ ) {
94 while ( $lines[ $i ] !~ /^\/\// )
96 $lines[ $i ] =~ s/^\s*\d+//;
111 # Martin A. Hansen, February 2008.
113 # !!! NOT USED ANYMORE !!! #
115 # Reads in and parses a file in SOFT format.
117 my ( $path, # full path to SOFT file
122 my ( $fh, @lines, $i, $c, $num, %key_hash, @fields, %id_hash, $id, $seq, $count, $record, @records, $platform_id, $sample_id, $sample_title );
124 $fh = &Maasha::Common::read_open( $path );
136 while ( $i < @lines )
138 if ( $lines[ $i ] =~ /^\^PLATFORM = (.+)/ )
142 elsif ( $lines[ $i ] =~ /^!platform_table_begin$/ )
144 @fields = split "\t", $lines[ $i + 1 ];
146 for ( $c = 0; $c < @fields; $c++ ) {
147 $key_hash{ $fields[ $c ] } = $c;
152 while ( $lines[ $c ] !~ /^!platform_table_end$/ )
154 @fields = split "\t", $lines[ $c ];
156 $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
163 elsif ( $lines[ $i ] =~ /^\^SAMPLE = (.+)/ )
167 elsif ( $lines[ $i ] =~ /^!Sample_title = (.+)/ )
171 elsif ( $lines[ $i ] =~ /^!sample_table_begin/ )
175 @fields = split "\t", $lines[ $i + 1 ];
177 for ( $c = 0; $c < @fields; $c++ ) {
178 $key_hash{ $fields[ $c ] } = $c;
183 while ( $lines[ $c ] !~ /^!sample_table_end$/ )
187 @fields = split "\t", $lines[ $c ];
189 $id = $fields[ $key_hash{ "ID_REF" } ];
190 $seq = $id_hash{ $id };
191 $count = $fields[ $key_hash{ "VALUE" } ];
195 $record->{ "SAMPLE_TITLE" } = $sample_title;
196 $record->{ "SEQ" } = $seq;
197 $record->{ "SEQ_NAME" } = join( "_", $platform_id, $sample_id, $num, $count );
199 push @records, $record;
213 return wantarray ? @records : \@records;
219 # Martin A. Hansen, June 2008.
221 # Create a index with linenumbers of the different tables
222 # in a soft file. The index is returned.
224 my ( $file, # file to index
229 my ( $fh, $line, $i, $c, @index, $first );
231 $fh = &Maasha::Common::read_open( $file );
238 while ( $line = <$fh> )
242 if ( $line =~ /^\^/ )
244 push @index, [ $line, $i ];
248 push @{ $index[ $c - 1 ] }, $i - 1;
261 push @{ $index[ $c - 1 ] }, $i - 1;
265 return wantarray ? @index : \@index;
269 sub soft_get_platform
271 # Martin A. Hansen, June 2008.
273 # Given a filehandle to a SOFT file parses the platform table
276 my ( $fh, # filehandle
277 $beg, # line number where platform tables begin
278 $end, # line number where platform tables end
283 my ( $line, @lines, $i, $c, @fields, %key_hash, %id_hash );
287 while ( $line = <$fh> )
291 push @lines, $line if $i >= $beg;
300 while ( $i < @lines )
302 if ( $lines[ $i ] =~ /^!platform_table_begin$/ )
304 @fields = split "\t", $lines[ $i + 1 ];
306 for ( $c = 0; $c < @fields; $c++ ) {
307 $key_hash{ $fields[ $c ] } = $c;
312 while ( $lines[ $c ] !~ /^!platform_table_end$/ )
314 @fields = split "\t", $lines[ $c ];
316 $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
327 return wantarray ? %id_hash : \%id_hash;
333 # Martin A. Hansen, June 2008.
335 # Given a filehandle to a SOFT file parses the platform table
338 my ( $fh, # filehandle
339 $plat_table, # hashref with platform tables
340 $beg, # line number where sample table begin
341 $end, # line number where sample table end
346 my ( $line, @lines, $i, $c, $platform_id, @fields, %key_hash, $num, $sample_id, $sample_title, $id, $seq, $count, @records, $record );
350 while ( $line = <$fh> )
354 push @lines, $line if $i >= $beg;
365 while ( $i < @lines )
367 if ( $lines[ $i ] =~ /^\^SAMPLE = (.+)/ )
371 elsif ( $lines[ $i ] =~ /!Sample_platform_id = (.+)/ )
375 elsif ( $lines[ $i ] =~ /^!Sample_title = (.+)/ )
379 elsif ( $lines[ $i ] =~ /^!sample_table_begin/ )
383 @fields = split "\t", $lines[ $i + 1 ];
385 for ( $c = 0; $c < @fields; $c++ ) {
386 $key_hash{ $fields[ $c ] } = $c;
391 while ( $lines[ $c ] !~ /^!sample_table_end$/ )
395 @fields = split "\t", $lines[ $c ];
397 $id = $fields[ $key_hash{ "ID_REF" } ];
398 $seq = $plat_table->{ $id };
399 $count = $fields[ $key_hash{ "VALUE" } ];
403 $record->{ "SAMPLE_TITLE" } = $sample_title;
404 $record->{ "SEQ" } = $seq;
405 $record->{ "SEQ_NAME" } = join( "_", $platform_id, $sample_id, $num, $count );
407 push @records, $record;
421 return wantarray ? @records : \@records;
425 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<