1 package Maasha::UCSC::Wiggle;
3 # Copyright (C) 2007-2008 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of wiggle data.
27 # http://genome.ucsc.edu/goldenPath/help/wiggle.html
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 use vars qw( @ISA @EXPORT_OK );
44 @ISA = qw( Exporter );
51 BITS => 32, # Number of bits in an integer
52 SEQ_MAX => 200_000_000, # Maximum sequence size
53 chrom => 0, # BED field names
68 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
71 sub fixedstep_entry_get
73 # Martin A. Hansen, December 2007.
75 # Given a file handle to a PhastCons file get the
76 # next entry which is all the lines after a "fixedStep"
77 # line and until the next "fixedStep" line or EOF.
79 my ( $fh, # filehandle
82 # Returns a list of lines
84 my ( $entry, @lines );
86 local $/ = "\nfixedStep ";
92 @lines = split "\n", $entry;
94 return if @lines == 0;
96 $lines[ 0 ] =~ s/fixedStep?\s*//;
97 $lines[ 0 ] = 'fixedStep ' . $lines[ 0 ];
99 return wantarray ? @lines : \@lines;
103 sub fixedstep_entry_put
105 # Martin A. Hansen, April 2008.
107 # Outputs a block of fixedStep values.
108 # Used for outputting wiggle data.
110 my ( $entry, # fixedStep entry
111 $fh, # filehandle - OPTIONAL
118 print $fh join( "\n", @{ $entry } ), "\n";
122 sub fixedstep2biopiece
124 # Martin A. Hansen, November 2008.
126 # Converts a fixedstep entry to a Biopiece record.
128 my ( $entry, # fixedstep entry arrayref
133 my ( $head, %record );
135 $head = shift @{ $entry };
137 if ( $head =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
139 $record{ "REC_TYPE" } = "fixed_step";
140 $record{ "CHR" } = $1;
141 $record{ "CHR_BEG" } = $2 - 1; # fixedStep is 1-based
142 $record{ "STEP" } = $3;
143 $record{ "VALS" } = join ";", @{ $entry };
147 Maasha::Common::error( qq(could not convert fixedStep entry to Biopiece record) );
150 return wantarray ? %record : \%record;
154 sub biopiece2fixedstep
156 # Martin A. Hansen, November 2008.
158 # Converts a Biopiece record to a fixedStep entry.
160 my ( $record, # Biopiece record
165 my ( @entry, $beg, $vals );
167 if ( exists $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq 'fixed_step' )
169 if ( exists $record->{ 'CHR' } and exists $record->{ 'CHR_BEG' } and exists $record->{ 'STEP' } )
171 $beg = $record->{ 'CHR_BEG' } + 1; # fixedStep is 1-based
172 push @entry, "fixedStep chrom=$record->{ 'CHR' } start=$beg step=$record->{ 'STEP' }";
174 $vals = $record->{ 'VALS' };
176 map { push @entry, $_ } split ";", $vals;
180 Maasha::Common::error( qq(could not convert Biopiece record to fixedStep) );
184 return wantarray ? @entry : \@entry;
190 # Martin A. Hansen, November 2008.
192 # Calculates fixedstep entries for a given BED file.
193 # Implemented using large Perl arrays to avoid sorting.
194 # Returns path to the fixedstep file.
196 my ( $bed_file, # path to BED file
197 $chr, # chromosome name
198 $use_score, # flag indicating that the score field should be used - OPTIONAL
199 $use_log10, # flag indicating that the score should be in log10 - OPTIONAL
204 my ( $fixedstep_file, $fh_in, $fh_out, $array, $beg, $end, @block, $i );
206 $fixedstep_file = "$bed_file.fixedstep";
208 $fh_in = Maasha::Filesys::file_read_open( $bed_file );
210 $array = fixedstep_calc_array( $fh_in, $use_score );
214 $fh_out = Maasha::Filesys::file_write_open( $fixedstep_file );
218 while ( ( $beg, $end ) = fixedstep_scan( $array, $beg ) and $beg != -1 )
220 @block = "fixedStep chrom=$chr start=" . ( $beg + 1 ) . " step=1";
222 for ( $i = $beg; $i <= $end; $i++ )
225 push @block, sprintf "%.4f", Maasha::Calc::log10( $array->[ $i ] );
227 push @block, $array->[ $i ];
231 fixedstep_entry_put( \@block, $fh_out );
240 return $fixedstep_file;
244 sub fixedstep_calc_array
246 # Martin A. Hansen, November 2008.
248 # Given a filehandle to a BED file for a single
249 # chromosome fills an array with scores based on
250 # clone count (or sequence read count) for the
251 # begin position through the end position of each
254 my ( $fh_in, # filehandle to BED file
255 $use_score, # flag indicating that the score field should be used - OPTIONAL
258 # Returns a bitarray.
260 my ( $bed, $clones, @begs, @lens, $i, @array );
262 while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in ) )
265 $clones = $bed->[ score ];
266 } elsif ( $bed->[ name ] =~ /_(\d+)$/ ) {
272 if ( $bed->[ blockCount ] and $bed->[ blockCount ] > 1 )
274 @begs = split ",", $bed->[ blockStarts ];
275 @lens = split ",", $bed->[ blockSizes ];
277 for ( $i = 0; $i < @begs; $i++ ) {
278 map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 );
283 map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 );
287 return wantarray ? @array : \@array;
293 # Martin A. Hansen, November 2008.
295 # Given a fixedstep array locates the next block of
296 # non-zero elements from a given begin position.
297 # A [ begin end ] tuple of the block posittion is returned
298 # if a block was found otherwise and empty tuple is returned.
300 my ( $array, # array to scan
301 $beg, # position to start scanning from
304 # Returns an arrayref.
308 while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
312 while ( $array->[ $end ] ) { $end++ }
315 return wantarray ? ( $beg, $end ) : [ $beg, $end ];
317 return wantarray ? () : [];
322 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<