1 package Maasha::UCSC::Wiggle;
3 # Copyright (C) 2007-2008 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of wiggle data.
27 # http://genome.ucsc.edu/goldenPath/help/wiggle.html
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 use vars qw( @ISA @EXPORT_OK );
44 @ISA = qw( Exporter );
51 chrom => 0, # BED field names
66 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
69 sub fixedstep_entry_get
71 # Martin A. Hansen, December 2007.
73 # Given a file handle to a PhastCons file get the
74 # next entry which is all the lines after a "fixedStep"
75 # line and until the next "fixedStep" line or EOF.
77 my ( $fh, # filehandle
80 # Returns a list of lines
82 my ( $entry, @lines );
84 local $/ = "\nfixedStep ";
90 @lines = split "\n", $entry;
92 return if @lines == 0;
94 $lines[ 0 ] =~ s/fixedStep?\s*//;
95 $lines[ 0 ] = 'fixedStep ' . $lines[ 0 ];
97 return wantarray ? @lines : \@lines;
101 sub fixedstep_entry_put
103 # Martin A. Hansen, April 2008.
105 # Outputs a block of fixedStep values.
106 # Used for outputting wiggle data.
108 my ( $entry, # fixedStep entry
109 $fh, # filehandle - OPTIONAL
116 map { print $fh "$_\n" } @{ $entry };
120 sub fixedstep2biopiece
122 # Martin A. Hansen, November 2008.
124 # Converts a fixedstep entry to a Biopiece record.
126 my ( $entry, # fixedstep entry arrayref
131 my ( $head, %record );
133 $head = shift @{ $entry };
135 if ( $head =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
137 $record{ "REC_TYPE" } = "fixed_step";
138 $record{ "CHR" } = $1;
139 $record{ "CHR_BEG" } = $2 - 1; # fixedStep is 1-based
140 $record{ "STEP" } = $3;
141 $record{ "VALS" } = join ";", @{ $entry };
145 Maasha::Common::error( qq(could not convert fixedStep entry to Biopiece record) );
148 return wantarray ? %record : \%record;
152 sub biopiece2fixedstep
154 # Martin A. Hansen, November 2008.
156 # Converts a Biopiece record to a fixedStep entry.
158 my ( $record, # Biopiece record
163 my ( @entry, $beg, $vals );
165 if ( exists $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq 'fixed_step' )
167 if ( exists $record->{ 'CHR' } and exists $record->{ 'CHR_BEG' } and exists $record->{ 'STEP' } )
169 $beg = $record->{ 'CHR_BEG' } + 1; # fixedStep is 1-based
170 push @entry, "fixedStep chrom=$record->{ 'CHR' } start=$beg step=$record->{ 'STEP' }";
172 $vals = $record->{ 'VALS' };
174 map { push @entry, $_ } split ";", $vals;
178 Maasha::Common::error( qq(could not convert Biopiece record to fixedStep) );
182 return wantarray ? @entry : \@entry;
188 # Martin A. Hansen, November 2008.
190 # Calculates fixedstep entries for a given BED file.
191 # Implemented using large Perl arrays to avoid sorting.
192 # Returns path to the fixedstep file.
194 my ( $bed_file, # path to BED file
195 $chr, # chromosome name
196 $use_score, # flag indicating that the score field should be used - OPTIONAL
197 $use_log10, # flag indicating that the score should be in log10 - OPTIONAL
202 my ( $fixedstep_file, $fh_in, $fh_out, $array, $beg, $end, @block );
204 $fixedstep_file = "$bed_file.fixedstep";
206 $fh_in = Maasha::Filesys::file_read_open( $bed_file );
208 $array = fixedstep_calc_array( $fh_in, $use_score );
212 $fh_out = Maasha::Filesys::file_write_open( $fixedstep_file );
216 while ( ( $beg, $end ) = fixedstep_scan( $array, $beg ) and $beg )
218 @block = @{ $array }[ $beg .. $end - 1 ];
220 map { $_ = sprintf "%.4f", Maasha::Calc::log10( $_ ) } @block if $use_log10;
222 unshift @block, "fixedStep chrom=$chr start=" . ( $beg + 1 ) . " step=1";
224 fixedstep_entry_put( \@block, $fh_out );
231 return $fixedstep_file;
235 sub fixedstep_calc_array
237 # Martin A. Hansen, November 2008.
239 # Given a filehandle to a BED file for a single
240 # chromosome fills an array with scores based on
241 # clone count (or sequence read count) for the
242 # begin position through the end position of each
245 my ( $fh_in, # filehandle to BED file
246 $use_score, # flag indicating that the score field should be used - OPTIONAL
251 my ( $bed, $clones, @array );
253 while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in, 5 ) )
256 $clones = $bed->[ score ];
257 } elsif ( $bed->[ name ] =~ /_(\d+)$/ ) {
263 map { $array[ $_ ] += $clones } $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1;
266 return wantarray ? @array : \@array;
272 # Martin A. Hansen, November 2008.
274 # Given a fixedstep array locates the next block of
275 # non-zero elements from a given begin position.
276 # A [ begin end ] tuple of the block posittion is returned
277 # if a block was found otherwise and empty tuple is returned.
279 my ( $array, # array to scan
280 $beg, # position to start scanning from
283 # Returns an arrayref.
287 while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
291 while ( $array->[ $end ] ) { $end++ }
294 return wantarray ? ( $beg, $end ) : [ $beg, $end ];
296 return wantarray ? () : [];
301 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<