3 # Copyright (C) 2007 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Stuff for interacting with UCSC genome browser
28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
33 use vars qw ( @ISA @EXPORT );
56 @ISA = qw( Exporter );
59 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
62 sub ucsc_config_entry_get
64 # Martin A. Hansen, November 2008.
66 # Given a filehandle to a UCSC Genome Browser
67 # config file (.ra) get the next entry and
68 # return as a hash. Entries are separated by blank
69 # lines. # lines are skipped unless it is the lines:
73 my ( $fh, # file hanlde
78 my ( $line, %record );
80 while ( $line = <$fh> )
84 if ( $line =~ /Track added by 'upload_to_ucsc' (\S+) (\S+)/) {
85 $record{ 'date' } = $1;
86 $record{ 'time' } = $2;
87 } elsif ( $line =~ /^# date: (.+)/ ) {
88 $record{ 'date' } = $1;
89 } elsif ( $line =~ /^# time: (.+)/ ) {
90 $record{ 'time' } = $1;
91 } elsif ( $line =~ /^# (?:database:|Database) (.+)/ ) {
92 $record{ 'database' } = $1;
93 } elsif ( $line =~ /^#/ ) {
95 } elsif ( $line =~ /(\S+)\s+(.+)/ ) {
99 last if $line =~ /^$/ and exists $record{ "track" };
102 return undef if not exists $record{ "track" };
104 return wantarray ? %record : \%record;
108 sub ucsc_config_entry_put
110 # Martin A. Hansen, November 2008.
112 # Outputs a Biopiece record (a hashref)
113 # to a filehandle or STDOUT.
115 my ( $record, # hashref
116 $fh_out, # file handle
123 $fh_out ||= \*STDOUT;
125 ( $date, $time ) = split " ", Maasha::Common::time_stamp();
127 $record->{ "date" } ||= $date;
128 $record->{ "time" } ||= $time;
130 print $fh_out "\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n";
132 map { print $fh_out "# $_: $record->{ $_ }\n" if exists $record->{ $_ } } qw( date time database );
133 map { print $fh_out "$_ $record->{ $_ }\n" if exists $record->{ $_ } } qw( track
154 sub ucsc_update_config
156 # Martin A. Hansen, September 2007.
158 # Update the /home/user/ucsc/my_tracks.ra file and executes makeCustomTracks.pl
160 my ( $options, # hashref
166 my ( $file, $entry, %new_entry, $fh_in, $fh_out, $was_found );
168 $file = $ENV{ "HOME" } . "/ucsc/my_tracks.ra";
170 Maasha::Filesys::file_copy( $file, "$file~" ); # create a backup
173 database => $options->{ 'database' },
174 track => $options->{ 'table' },
175 shortLabel => $options->{ 'short_label' },
176 longLabel => $options->{ 'long_label' },
177 group => $options->{ 'group' },
178 priority => $options->{ 'priority' },
179 visibility => $options->{ 'visibility' },
180 color => $options->{ 'color' },
184 $new_entry{ 'useScore' } = 1 if $options->{ 'use_score' };
185 $new_entry{ 'mafTrack' } = "multiz17way" if $type eq "type bed 6 +";
186 $new_entry{ 'maxHeightPixels' } = "50:50:11" if $type eq "wig 0";
188 $fh_in = Maasha::Filesys::file_read_open( "$file~" );
189 $fh_out = Maasha::Filesys::file_write_open( $file );
191 while ( $entry = ucsc_config_entry_get( $fh_in ) )
193 if ( $entry->{ 'database' } eq $new_entry{ 'database' } and $entry->{ 'track' } eq $new_entry{ 'track' } )
195 ucsc_config_entry_put( \%new_entry, $fh_out );
201 ucsc_config_entry_put( $entry, $fh_out );
205 ucsc_config_entry_put( \%new_entry, $fh_out ) if not $was_found;
210 Maasha::Common::run( "ucscMakeTracks.pl", "-b > /dev/null 2>&1" );
214 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
219 # Martin A. Hansen, July 2008
221 # Create a fixedStep index for PhastCons data.
223 my ( $file, # file to index
224 $dir, # dir with file
231 $index = fixedstep_index_create( "$dir/$file" );
233 fixedstep_index_store( "$dir/$file.index", $index );
237 sub phastcons_parse_entry
239 # Martin A. Hansen, December 2007.
241 # Given a PhastCons entry converts this to a
242 # list of super blocks.
244 # $options->{ "min" } ||= 10;
245 # $options->{ "dist" } ||= 25;
246 # $options->{ "threshold" } ||= 0.8;
247 # $options->{ "gap" } ||= 5;
249 my ( $lines, # list of lines
250 $args, # argument hash
255 my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
257 $info = shift @{ $lines };
259 if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
265 die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
270 while ( $i < @{ $lines } )
272 if ( $lines->[ $i ] >= $args->{ "threshold" } )
276 while ( $c < @{ $lines } )
278 if ( $lines->[ $c ] < $args->{ "threshold" } )
282 while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
286 if ( $j - $c > $args->{ "gap" } )
288 if ( $c - $i >= $args->{ "min" } )
292 CHR_BEG => $beg + $i - 1,
293 CHR_END => $beg + $c - 2,
311 if ( $c - $i >= $args->{ "min" } )
315 CHR_BEG => $beg + $i - 1,
316 CHR_END => $beg + $c - 2,
331 while ( $i < @blocks )
335 while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
340 push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
345 foreach $super_block ( @super_blocks )
347 foreach $block ( @{ $super_block } )
349 push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
350 push @lens, $block->{ "CHR_LEN" } - 1;
356 CHR => $super_block->[ 0 ]->{ "CHR" },
357 CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
358 CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
362 THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
363 THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
365 BLOCK_COUNT => scalar @{ $super_block },
366 BLOCK_LENS => join( ",", @lens ),
367 Q_BEGS => join( ",", @begs ),
374 return wantarray ? @entries : \@entries;
378 sub phastcons_normalize
380 # Martin A. Hansen, January 2008.
382 # Normalizes a list of lists with PhastCons scores,
383 # in such a way that each list contains the same number
384 # of PhastCons scores.
386 my ( $AoA, # AoA with PhastCons scores
391 my ( $list, $max, $min, $mean, $diff );
396 foreach $list ( @{ $AoA } )
398 $min = scalar @{ $list } if scalar @{ $list } < $min;
399 $max = scalar @{ $list } if scalar @{ $list } > $max;
402 $mean = int( ( $min + $max ) / 2 );
404 # print STDERR "min->$min max->$max mean->$mean\n";
406 foreach $list ( @{ $AoA } )
408 $diff = scalar @{ $list } - $mean;
410 phastcons_list_inflate( $list, abs( $diff ) ) if $diff < 0;
411 phastcons_list_deflate( $list, $diff ) if $diff > 0;
414 return wantarray ? @{ $AoA } : $AoA;
418 sub phastcons_list_inflate
420 # Martin A. Hansen, January 2008.
422 # Inflates a list with a given number of elements
423 # in such a way that the extra elements are introduced
424 # evenly over the entire length of the list. The value
425 # of the extra elements is based on a mean of the
428 my ( $list, # list of elements
429 $diff, # number of elements to introduce
434 my ( $len, $space, $i, $pos );
436 $len = scalar @{ $list };
438 $space = $len / $diff;
440 for ( $i = 0; $i < $diff; $i++ )
442 $pos = int( ( $space / 2 ) + $i * $space );
444 splice @{ $list }, $pos, 0, ( $list->[ $pos - 1 ] + $list->[ $pos + 1 ] ) / 2;
445 # splice @{ $list }, $pos, 0, "X";
448 die qq(ERROR: Bad inflate\n) if scalar @{ $list } != $len + $diff;
452 sub phastcons_list_deflate
454 # Martin A. Hansen, January 2008.
456 # Deflates a list by removing a given number of elements
457 # evenly distributed over the entire list.
459 my ( $list, # list of elements
460 $diff, # number of elements to remove
465 my ( $len, $space, $i, $pos );
467 $len = scalar @{ $list };
469 $space = ( $len - $diff ) / $diff;
471 for ( $i = 0; $i < $diff; $i++ )
473 $pos = int( ( $space / 2 ) + $i * $space );
475 splice @{ $list }, $pos, 1;
478 die qq(ERROR: Dad deflate\n) if scalar @{ $list } != $len - $diff;
484 # Martin A. Hansen, January 2008.
486 # Given a normalized PhastCons matrix in an AoA,
487 # calculate the mean for each column and return as a list.
489 my ( $AoA, # AoA with normalized PhastCons scores
496 $AoA = Maasha::Matrix::matrix_flip( $AoA );
498 map { push @list, Maasha::Calc::mean( $_ ) } @{ $AoA };
500 return wantarray ? @list : \@list;
506 # Martin A. Hansen, January 2008.
508 # Given a normalized PhastCons matrix in an AoA,
509 # calculate the median for each column and return as a list.
511 my ( $AoA, # AoA with normalized PhastCons scores
518 $AoA = Maasha::Matrix::matrix_flip( $AoA );
520 map { push @list, Maasha::Calc::median( $_ ) } @{ $AoA };
522 return wantarray ? @list : \@list;
526 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MULTIPLE ALIGNMENT FILES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
531 # Martin A. Hansen, April 2008.
533 # Executes mafFrag to extract a subalignment from a multiz track
534 # in the UCSC genome browser database.
536 my ( $tmp_dir, # temporary directory
537 $database, # genome database
538 $table, # table with the multiz track
540 $beg, # begin position
545 # Returns a list of record
547 my ( $tmp_file, $align );
549 $tmp_file = "$tmp_dir/maf_extract.maf";
551 Maasha::Common::run( "mafFrag", "$database $table $chr $beg $end $strand $tmp_file" );
553 $align = maf_parse( $tmp_file );
557 return wantarray ? @{ $align } : $align;
563 # Martin A. Hansen, April 2008.
566 my ( $path, # full path to MAF file
569 # Returns a list of record.
571 my ( $fh, $line, @fields, @align );
573 $fh = Maasha::Filesys::file_read_open( $path );
575 while ( $line = <$fh> )
581 @fields = split / /, $line;
584 SEQ_NAME => $fields[ 1 ],
585 SEQ => $fields[ -1 ],
587 ALIGN_LEN => length $fields[ -1 ],
594 return wantarray ? @align : \@align;
598 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MySQL CONF <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
603 # Martin A. Hansen, May 2008
605 # Fetches the MySQL database user name from the
606 # .hg.conf file in the users home directory.
610 my ( $file, $fh, $line, $user );
612 $file = "$ENV{ 'HOME' }/.hg.conf";
614 return if not -f $file;
616 $fh = Maasha::Filesys::file_read_open( $file );
618 while ( $line = <$fh> )
622 if ( $line =~ /^db\.user=(.+)/ )
636 sub ucsc_get_password
638 # Martin A. Hansen, May 2008
640 # Fetches the MySQL database password from the
641 # .hg.conf file in the users home directory.
645 my ( $file, $fh, $line, $password );
647 $file = "$ENV{ 'HOME' }/.hg.conf";
649 return if not -f $file;
651 $fh = Maasha::Filesys::file_read_open( $file );
653 while ( $line = <$fh> )
657 if ( $line =~ /^db\.password=(.+)/ )
671 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<