1 package Maasha::UCSC::Wiggle;
3 # Copyright (C) 2007-2008 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of wiggle data.
27 # http://genome.ucsc.edu/goldenPath/help/wiggle.html
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 use vars qw( @ISA @EXPORT_OK );
44 @ISA = qw( Exporter );
51 BITS => 32, # Number of bits in an integer
52 SEQ_MAX => 200_000_000, # Maximum sequence size
53 chrom => 0, # BED field names
68 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
71 sub fixedstep_entry_get
73 # Martin A. Hansen, December 2007.
75 # Given a file handle to a PhastCons file get the
76 # next entry which is all the lines after a "fixedStep"
77 # line and until the next "fixedStep" line or EOF.
79 my ( $fh, # filehandle
82 # Returns a list of lines
84 my ( $entry, @lines );
86 local $/ = "\nfixedStep ";
94 @lines = split "\n", $entry;
96 return if @lines == 0;
98 $lines[ 0 ] =~ s/fixedStep?\s*//;
99 $lines[ 0 ] = 'fixedStep ' . $lines[ 0 ];
101 return wantarray ? @lines : \@lines;
105 sub fixedstep_entry_put
107 # Martin A. Hansen, April 2008.
109 # Outputs a block of fixedStep values.
110 # Used for outputting wiggle data.
112 my ( $entry, # fixedStep entry
113 $fh, # filehandle - OPTIONAL
120 print $fh join( "\n", @{ $entry } ), "\n";
124 sub fixedstep2biopiece
126 # Martin A. Hansen, November 2008.
128 # Converts a fixedstep entry to a Biopiece record.
130 my ( $entry, # fixedstep entry arrayref
135 my ( $head, %record );
137 $head = shift @{ $entry };
139 if ( $head =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
141 $record{ "REC_TYPE" } = "fixed_step";
142 $record{ "CHR" } = $1;
143 $record{ "CHR_BEG" } = $2 - 1; # fixedStep is 1-based
144 $record{ "STEP" } = $3;
145 $record{ "VALS" } = join ";", @{ $entry };
149 Maasha::Common::error( qq(could not convert fixedStep entry to Biopiece record) );
152 return wantarray ? %record : \%record;
156 sub biopiece2fixedstep
158 # Martin A. Hansen, November 2008.
160 # Converts a Biopiece record to a fixedStep entry.
162 my ( $record, # Biopiece record
167 my ( @entry, $beg, $vals );
169 if ( exists $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq 'fixed_step' )
171 if ( exists $record->{ 'CHR' } and exists $record->{ 'CHR_BEG' } and exists $record->{ 'STEP' } )
173 $beg = $record->{ 'CHR_BEG' } + 1; # fixedStep is 1-based
174 push @entry, "fixedStep chrom=$record->{ 'CHR' } start=$beg step=$record->{ 'STEP' }";
176 $vals = $record->{ 'VALS' };
178 map { push @entry, $_ } split ";", $vals;
182 Maasha::Common::error( qq(could not convert Biopiece record to fixedStep) );
186 return wantarray ? @entry : \@entry;
192 # Martin A. Hansen, November 2008.
194 # Calculates fixedstep entries for a given BED file.
195 # Implemented using large Perl arrays to avoid sorting.
196 # Returns path to the fixedstep file.
198 my ( $bed_file, # path to BED file
199 $chr, # chromosome name
200 $use_score, # flag indicating that the score field should be used - OPTIONAL
201 $use_log10, # flag indicating that the score should be in log10 - OPTIONAL
206 my ( $fixedstep_file, $fh_in, $fh_out, $array, $beg, $end, @block, $i );
208 $fixedstep_file = "$bed_file.fixedstep";
210 $fh_in = Maasha::Filesys::file_read_open( $bed_file );
212 $array = fixedstep_calc_array( $fh_in, $use_score );
216 $fh_out = Maasha::Filesys::file_write_open( $fixedstep_file );
220 while ( ( $beg, $end ) = fixedstep_scan( $array, $beg ) and $beg != -1 )
222 @block = "fixedStep chrom=$chr start=" . ( $beg + 1 ) . " step=1";
224 for ( $i = $beg; $i < $end; $i++ )
227 push @block, sprintf "%.4f", Maasha::Calc::log10( $array->[ $i ] );
229 push @block, $array->[ $i ];
233 fixedstep_entry_put( \@block, $fh_out );
242 return $fixedstep_file;
246 sub fixedstep_calc_array
248 # Martin A. Hansen, November 2008.
250 # Given a filehandle to a BED file for a single
251 # chromosome fills an array with scores based on
252 # clone count (or sequence read count) for the
253 # begin position through the end position of each
256 my ( $fh_in, # filehandle to BED file
257 $use_score, # flag indicating that the score field should be used - OPTIONAL
260 # Returns a bitarray.
262 my ( $bed, $clones, @begs, @lens, $i, @array );
264 while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in ) )
267 $clones = $bed->[ score ];
268 } elsif ( defined $bed->[ name ] and $bed->[ name ] =~ /_(\d+)$/ ) {
274 if ( $bed->[ blockCount ] and $bed->[ blockCount ] > 1 )
276 @begs = split ",", $bed->[ blockStarts ];
277 @lens = split ",", $bed->[ blockSizes ];
279 for ( $i = 0; $i < @begs; $i++ ) {
280 map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 );
285 map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 );
289 return wantarray ? @array : \@array;
295 # Martin A. Hansen, November 2008.
297 # Given a fixedstep array locates the next block of
298 # non-zero elements from a given begin position.
299 # A [ begin end ] tuple of the block posittion is returned
300 # if a block was found otherwise and empty tuple is returned.
302 my ( $array, # array to scan
303 $beg, # position to start scanning from
306 # Returns an arrayref.
310 while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
314 while ( $array->[ $end ] ) { $end++ }
317 return wantarray ? ( $beg, $end ) : [ $beg, $end ];
319 return wantarray ? () : [];
324 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<