1 package Maasha::UCSC::Wiggle;
3 # Copyright (C) 2007-2010 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of wiggle data.
27 # http://genome.ucsc.edu/goldenPath/help/wiggle.html
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
42 use vars qw( @ISA @EXPORT_OK );
46 @ISA = qw( Exporter );
53 BITS => 32, # Number of bits in an integer
54 SEQ_MAX => 200_000_000, # Maximum sequence size
55 chrom => 0, # BED field names
70 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
73 sub fixedstep_entry_get
75 # Martin A. Hansen, December 2007.
77 # Given a file handle to a PhastCons file get the
78 # next entry which is all the lines after a "fixedStep"
79 # line and until the next "fixedStep" line or EOF.
81 my ( $fh, # filehandle
84 # Returns a list of lines
86 my ( $entry, @lines );
88 local $/ = "\nfixedStep ";
96 @lines = split "\n", $entry;
98 return if @lines == 0;
100 $lines[ 0 ] =~ s/fixedStep?\s*//;
101 $lines[ 0 ] = 'fixedStep ' . $lines[ 0 ];
103 return wantarray ? @lines : \@lines;
107 sub fixedstep_entry_put
109 # Martin A. Hansen, April 2008.
111 # Outputs a block of fixedStep values.
112 # Used for outputting wiggle data.
114 my ( $entry, # fixedStep entry
115 $fh, # filehandle - OPTIONAL
122 print $fh join( "\n", @{ $entry } ), "\n";
126 sub fixedstep2biopiece
128 # Martin A. Hansen, November 2008.
130 # Converts a fixedstep entry to a Biopiece record.
132 my ( $entry, # fixedstep entry arrayref
137 my ( $head, %record );
139 $head = shift @{ $entry };
141 if ( $head =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
143 $record{ "REC_TYPE" } = "fixed_step";
144 $record{ "CHR" } = $1;
145 $record{ "CHR_BEG" } = $2 - 1; # fixedStep is 1-based
146 $record{ "STEP" } = $3;
147 $record{ "VALS" } = join ";", @{ $entry };
151 Maasha::Common::error( qq(could not convert fixedStep entry to Biopiece record) );
154 return wantarray ? %record : \%record;
158 sub biopiece2fixedstep
160 # Martin A. Hansen, November 2008.
162 # Converts a Biopiece record to a fixedStep entry.
164 my ( $record, # Biopiece record
165 $opt_key, # Key from option hash
166 $opt_step, # Step from option hash
171 my ( @entry, $chr, $beg, $step, $vals );
173 $chr = $record->{ 'CHR' } || $record->{ 'S_ID' };
174 $beg = $record->{ 'CHR_BEG' } || $record->{ 'S_BEG' };
175 $step = $record->{ 'STEP' } || $opt_step;
176 $vals = $record->{ 'VALS' };
177 $vals = $record->{ $opt_key } if $opt_key;
179 if ( defined $chr and defined $beg and defined $step and defined $vals)
181 $beg += 1; # fixedStep is 1-based
183 push @entry, "fixedStep chrom=$chr start=$beg step=$step";
185 map { push @entry, $_ } split ";", $vals;
191 return wantarray ? @entry : \@entry;
198 # Martin A. Hansen, November 2008.
200 # Calculates fixedstep entries for a given BED file.
201 # Implemented using large Perl arrays to avoid sorting.
202 # Returns path to the fixedstep file.
204 my ( $bed_file, # path to BED file
205 $chr, # chromosome name
206 $log10, # flag indicating that the score should be in log10 - OPTIONAL
211 my ( $fixedstep_file, $fh_in, $fh_out, $array, $beg, $end, @block, $i );
213 $fixedstep_file = "$bed_file.fixedstep";
215 $fh_in = Maasha::Filesys::file_read_open( $bed_file );
217 $array = fixedstep_calc_array( $fh_in );
221 $fh_out = Maasha::Filesys::file_write_open( $fixedstep_file );
225 while ( ( $beg, $end ) = fixedstep_scan( $array, $beg ) and $beg != -1 )
227 @block = "fixedStep chrom=$chr start=" . ( $beg + 1 ) . " step=1";
229 for ( $i = $beg; $i < $end; $i++ )
232 push @block, sprintf "%.4f", Maasha::Calc::log10( $array->[ $i ] );
234 push @block, $array->[ $i ];
238 fixedstep_entry_put( \@block, $fh_out );
247 return $fixedstep_file;
251 sub fixedstep_calc_array
253 # Martin A. Hansen, November 2008.
255 # Given a filehandle to a BED file for a single
256 # chromosome fills an array with SCORE for the
257 # begin/end positions of each BED entry.
259 my ( $fh_in, # filehandle to BED file
262 # Returns a bytearray.
264 my ( $bed, $score, @begs, @lens, $i, @array );
266 while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in ) )
268 if ( defined $bed->[ score ] ) {
269 $score = $bed->[ score ];
274 if ( $bed->[ blockCount ] and $bed->[ blockCount ] > 1 )
276 @begs = split ",", $bed->[ blockStarts ];
277 @lens = split ",", $bed->[ blockSizes ];
279 for ( $i = 0; $i < @begs; $i++ ) {
280 map { $array[ $_ ] += $score } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 );
285 map { $array[ $_ ] += $score } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 );
289 return wantarray ? @array : \@array;
295 # Martin A. Hansen, November 2008.
297 # Given a fixedstep array locates the next block of
298 # non-zero elements from a given begin position.
299 # A [ begin end ] tuple of the block posittion is returned
300 # if a block was found otherwise and empty tuple is returned.
302 my ( $array, # array to scan
303 $beg, # position to start scanning from
306 # Returns an arrayref.
310 while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
314 while ( $array->[ $end ] ) { $end++ }
317 return wantarray ? ( $beg, $end ) : [ $beg, $end ];
319 return wantarray ? () : [];
326 # Martin A. Hansen, June 2010.
328 # Indexes a specified fixedstep file and saves the index
329 # to a specified directory and index name.
331 my ( $path, # fixedstep file to index
332 $dir, # direcotory to place index
333 $name, # name of index
340 $index = fixedstep_index_create( $path );
341 fixedstep_index_store( "$dir/$name", $index );
345 sub fixedstep_index_create
347 # Martin A. Hansen, June 2010.
349 # Indexes a specified fixedStep file.
350 # The index consists of a hash with chromosomes as keys,
351 # and a list of { chr_beg, chr_end, index_beg, index_len }
354 my ( $path, # path to fixedStep file
359 my ( $fh, $pos, $line, $index_beg, $index_len, $chr, $step, $chr_beg, $chr_end, $len, %index );
361 $fh = Maasha::Filesys::file_read_open( $path );
369 while ( $line = <$fh> )
371 if ( $line =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)/ )
373 if ( $chr_end > $chr_beg )
375 $index_len = $pos - $index_beg;
377 push @{ $index{ $chr } }, {
379 CHR_END => $chr_end - 1,
380 INDEX_BEG => $index_beg,
381 INDEX_LEN => $index_len,
386 $chr_beg = $2 - 1; # fixedStep files are 1-based
390 $index_beg = $pos + length $line;
397 $pos += length $line;
400 $index_len = $pos - $index_beg;
402 push @{ $index{ $chr } }, {
404 CHR_END => $chr_end - 1,
405 INDEX_BEG => $index_beg,
406 INDEX_LEN => $index_len,
411 return wantarray ? %index : \%index;
415 sub fixedstep_index_store
417 # Martin A. Hansen, January 2008.
419 # Writes a fixedStep index to binary file.
421 my ( $path, # full path to file
422 $index, # list with index
427 Maasha::Filesys::file_store( $path, $index );
431 sub fixedstep_index_retrieve
433 # Martin A. Hansen, January 2008.
435 # Retrieves a fixedStep index from binary file.
437 my ( $path, # full path to file
444 $index = Maasha::Filesys::file_retrieve( $path );
446 return wantarray ? %{ $index } : $index;
450 sub fixedstep_index_lookup
452 # Martin A. Hansen, January 2008.
454 # Locate fixedStep entries from index based on
455 # chr, chr_beg and chr_end.
457 my ( $index, # data structure
459 $chr_beg, # chromosome beg
460 $chr_end, # chromosome end
467 if ( exists $index->{ $chr } ) {
468 @subindex = grep { $_->{ 'CHR_END' } >= $chr_beg and $_->{ 'CHR_BEG' } <= $chr_end } @{ $index->{ $chr } };
470 Maasha::Common::error( qq(Chromosome "$chr" not found in index) );
473 return wantarray ? @subindex : \@subindex;
477 sub wiggle_upload_to_ucsc
479 # Martin A. Hansen, May 2008.
481 # Upload a wiggle file to the UCSC database.
483 my ( $tmp_dir, # temporary directory
484 $wib_dir, # wib directory
485 $wig_file, # file to upload,
486 $options, # argument hashref
493 # $args = join " ", "-tmpDir=$tmp_dir", "-pathPrefix=$wib_dir", $options->{ "database" }, $options->{ 'table' }, $wig_file;
495 # Maasha::Common::run( "hgLoadWiggle", "$args > /dev/null 2>&1" );
497 if ( $options->{ 'verbose' } ) {
498 `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file`;
500 `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
505 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<