1 package Maasha::UCSC::Wiggle;
3 # Copyright (C) 2007-2008 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of wiggle data.
27 # http://genome.ucsc.edu/goldenPath/help/wiggle.html
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
41 use vars qw( @ISA @EXPORT_OK );
45 @ISA = qw( Exporter );
52 BITS => 32, # Number of bits in an integer
53 SEQ_MAX => 200_000_000, # Maximum sequence size
54 chrom => 0, # BED field names
69 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
72 sub fixedstep_entry_get
74 # Martin A. Hansen, December 2007.
76 # Given a file handle to a PhastCons file get the
77 # next entry which is all the lines after a "fixedStep"
78 # line and until the next "fixedStep" line or EOF.
80 my ( $fh, # filehandle
83 # Returns a list of lines
85 my ( $entry, @lines );
87 local $/ = "\nfixedStep ";
95 @lines = split "\n", $entry;
97 return if @lines == 0;
99 $lines[ 0 ] =~ s/fixedStep?\s*//;
100 $lines[ 0 ] = 'fixedStep ' . $lines[ 0 ];
102 return wantarray ? @lines : \@lines;
106 sub fixedstep_entry_put
108 # Martin A. Hansen, April 2008.
110 # Outputs a block of fixedStep values.
111 # Used for outputting wiggle data.
113 my ( $entry, # fixedStep entry
114 $fh, # filehandle - OPTIONAL
121 print $fh join( "\n", @{ $entry } ), "\n";
125 sub fixedstep2biopiece
127 # Martin A. Hansen, November 2008.
129 # Converts a fixedstep entry to a Biopiece record.
131 my ( $entry, # fixedstep entry arrayref
136 my ( $head, %record );
138 $head = shift @{ $entry };
140 if ( $head =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
142 $record{ "REC_TYPE" } = "fixed_step";
143 $record{ "CHR" } = $1;
144 $record{ "CHR_BEG" } = $2 - 1; # fixedStep is 1-based
145 $record{ "STEP" } = $3;
146 $record{ "VALS" } = join ";", @{ $entry };
150 Maasha::Common::error( qq(could not convert fixedStep entry to Biopiece record) );
153 return wantarray ? %record : \%record;
157 sub biopiece2fixedstep
159 # Martin A. Hansen, November 2008.
161 # Converts a Biopiece record to a fixedStep entry.
163 my ( $record, # Biopiece record
168 my ( @entry, $beg, $vals );
170 if ( exists $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq 'fixed_step' )
172 if ( exists $record->{ 'CHR' } and exists $record->{ 'CHR_BEG' } and exists $record->{ 'STEP' } )
174 $beg = $record->{ 'CHR_BEG' } + 1; # fixedStep is 1-based
175 push @entry, "fixedStep chrom=$record->{ 'CHR' } start=$beg step=$record->{ 'STEP' }";
177 $vals = $record->{ 'VALS' };
179 map { push @entry, $_ } split ";", $vals;
183 Maasha::Common::error( qq(could not convert Biopiece record to fixedStep) );
187 return wantarray ? @entry : \@entry;
193 # Martin A. Hansen, November 2008.
195 # Calculates fixedstep entries for a given BED file.
196 # Implemented using large Perl arrays to avoid sorting.
197 # Returns path to the fixedstep file.
199 my ( $bed_file, # path to BED file
200 $chr, # chromosome name
201 $log10, # flag indicating that the score should be in log10 - OPTIONAL
206 my ( $fixedstep_file, $fh_in, $fh_out, $array, $beg, $end, @block, $i );
208 $fixedstep_file = "$bed_file.fixedstep";
210 $fh_in = Maasha::Filesys::file_read_open( $bed_file );
212 $array = fixedstep_calc_array( $fh_in );
216 $fh_out = Maasha::Filesys::file_write_open( $fixedstep_file );
220 while ( ( $beg, $end ) = fixedstep_scan( $array, $beg ) and $beg != -1 )
222 @block = "fixedStep chrom=$chr start=" . ( $beg + 1 ) . " step=1";
224 for ( $i = $beg; $i < $end; $i++ )
227 push @block, sprintf "%.4f", Maasha::Calc::log10( $array->[ $i ] );
229 push @block, $array->[ $i ];
233 fixedstep_entry_put( \@block, $fh_out );
242 return $fixedstep_file;
246 sub fixedstep_calc_array
248 # Martin A. Hansen, November 2008.
250 # Given a filehandle to a BED file for a single
251 # chromosome fills an array with SCORE for the
252 # begin/end positions of each BED entry.
254 my ( $fh_in, # filehandle to BED file
257 # Returns a bytearray.
259 my ( $bed, $score, @begs, @lens, $i, @array );
261 while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in ) )
263 if ( defined $bed->[ score ] ) {
264 $score = $bed->[ score ];
269 if ( $bed->[ blockCount ] and $bed->[ blockCount ] > 1 )
271 @begs = split ",", $bed->[ blockStarts ];
272 @lens = split ",", $bed->[ blockSizes ];
274 for ( $i = 0; $i < @begs; $i++ ) {
275 map { $array[ $_ ] += $score } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 );
280 map { $array[ $_ ] += $score } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 );
284 return wantarray ? @array : \@array;
290 # Martin A. Hansen, November 2008.
292 # Given a fixedstep array locates the next block of
293 # non-zero elements from a given begin position.
294 # A [ begin end ] tuple of the block posittion is returned
295 # if a block was found otherwise and empty tuple is returned.
297 my ( $array, # array to scan
298 $beg, # position to start scanning from
301 # Returns an arrayref.
305 while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
309 while ( $array->[ $end ] ) { $end++ }
312 return wantarray ? ( $beg, $end ) : [ $beg, $end ];
314 return wantarray ? () : [];
319 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<