1 package Maasha::UCSC::Wiggle;
3 # Copyright (C) 2007-2008 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of wiggle data.
27 # http://genome.ucsc.edu/goldenPath/help/wiggle.html
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
39 use vars qw( @ISA @EXPORT_OK );
43 @ISA = qw( Exporter );
50 chrom => 0, # BED field names
65 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
68 sub fixedstep_entry_get
70 # Martin A. Hansen, December 2007.
72 # Given a file handle to a PhastCons file get the
73 # next entry which is all the lines after a "fixedStep"
74 # line and until the next "fixedStep" line or EOF.
76 my ( $fh, # filehandle
79 # Returns a list of lines
81 my ( $entry, @lines );
83 local $/ = "\nfixedStep ";
89 @lines = split "\n", $entry;
91 return if @lines == 0;
93 $lines[ 0 ] =~ s/fixedStep?\s*//;
94 $lines[ 0 ] = 'fixedStep ' . $lines[ 0 ];
96 return wantarray ? @lines : \@lines;
100 sub fixedstep_entry_put
102 # Martin A. Hansen, April 2008.
104 # Outputs a block of fixedStep values.
105 # Used for outputting wiggle data.
107 my ( $chr, # chromosome
108 $beg, # start position
109 $block, # list of scores
110 $fh, # filehandle - OPTIONAL
115 $beg += 1; # fixedStep format is 1 based.
119 print $fh "fixedStep chrom=$chr start=$beg step=1\n";
121 map { print $fh "$_\n" } @{ $block };
125 sub fixedstep2biopiece
127 # Martin A. Hansen, November 2008.
129 # Converts a fixedstep entry to a Biopiece record.
131 my ( $entry, # fixedstep entry arrayref
136 my ( $head, %record );
138 $head = shift @{ $entry };
140 if ( $head =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
142 $record{ "REC_TYPE" } = "fixed_step";
143 $record{ "CHR" } = $1;
144 $record{ "CHR_BEG" } = $2;
145 $record{ "STEP" } = $3;
146 $record{ "VALS" } = join ";", @{ $entry };
150 Maasha::Common::error( qq(could not convert fixedStep entry to Biopiece record) );
153 return wantarray ? %record : \%record;
157 sub biopiece2fixedstep
159 # Martin A. Hansen, November 2008.
161 # Converts a Biopiece record to a fixedStep entry.
163 my ( $record, # Biopiece record
168 my ( @entry, $vals );
170 if ( exists $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq 'fixed_step' )
172 if ( exists $record->{ 'CHR' } and exists $record->{ 'CHR_BEG' } and exists $record->{ 'STEP' } )
174 push @entry, "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }";
176 $vals = $record->{ 'VALS' };
178 map { push @entry, $_ } split ";", $vals;
182 Maasha::Common::error( qq(could not convert Biopiece record to fixedStep) );
186 return wantarray ? @entry : \@entry;
192 # Martin A. Hansen, November 2008.
194 # Calculates fixedstep entries for a given BED file.
195 # Implemented using large Perl arrays to avoid sorting.
196 # Returns path to the fixedstep file.
198 my ( $bed_file, # path to BED file
199 $chr, # chromosome name
200 $use_score, # flag indicating that the score field should be used - OPTIONAL
205 my ( $fixedstep_file, $fh_in, $fh_out, $array, $beg, $end, @block );
207 $fixedstep_file = "$bed_file.fixedstep";
209 $fh_in = Maasha::Filesys::file_read_open( $bed_file );
211 $array = fixedstep_calc_array( $fh_in, $use_score );
215 $fh_out = Maasha::Filesys::file_write_open( $fixedstep_file );
219 while ( ( $beg, $end ) = fixedstep_scan( $array, $beg ) and $beg )
221 @block = @{ $array }[ $beg .. $end ];
223 fixedstep_entry_put( $chr, $beg, \@block, $fh_out );
230 return $fixedstep_file;
234 sub fixedstep_calc_array
236 # Martin A. Hansen, November 2008.
238 # Given a filehandle to a BED file for a single
239 # chromosome fills an array with scores based on
240 # clone count (or sequence read count) for the
241 # begin position through the end position of each
244 my ( $fh_in, # filehandle to BED file
245 $use_score, # flag indicating that the score field should be used - OPTIONAL
250 my ( $bed, $clones, @array );
252 while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in, 5 ) )
255 $clones = $bed->[ score ];
256 } elsif ( $bed->[ name ] =~ /_(\d+)$/ ) {
262 map { $array[ $_ ] += $clones } $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1;
265 return wantarray ? @array : \@array;
271 # Martin A. Hansen, November 2008.
273 # Given a fixedstep array locates the next block of
274 # non-zero elements from a given begin position.
275 # A [ begin end ] tuple of the block posittion is returned
276 # if a block was found otherwise and empty tuple is returned.
278 my ( $array, # array to scan
279 $beg, # position to start scanning from
282 # Returns an arrayref.
286 while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
290 while ( $array->[ $end ] ) { $end++ }
293 return wantarray ? ( $beg, $end ) : [ $beg, $end ];
295 return wantarray ? () : [];
300 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<