3 # Copyright (C) 2007 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Stuff for interacting with UCSC genome browser
28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 use vars qw ( @ISA @EXPORT );
35 use Time::HiRes qw( gettimeofday );
62 @ISA = qw( Exporter );
64 my $TIME = gettimeofday();
67 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
70 # http://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#BED
73 sub bed_entry_get_array
75 # Martin A. Hansen, September 2008.
77 # Reads a BED entry given a filehandle.
79 # This is a new _faster_ BED entry parser that
80 # uses arrays and not hashrefs.
82 # IMPORTANT! This function does not correct the
83 # BED_END position that is kept the way UCSC
86 my ( $fh, # file handle
87 $cols, # columns to read - OPTIONAL (3,4,5,6, or 12)
96 $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
98 return if not defined $line;
100 if ( not defined $cols ) {
101 $cols = 1 + $line =~ tr/\t//;
104 @entry = split "\t", $line, $cols + 1;
106 pop @entry if scalar @entry > $cols;
108 return wantarray ? @entry : \@entry;
114 # Martin A. Hansen, December 2007.
116 # Reads a bed entry given a filehandle.
118 my ( $fh, # file handle
119 $columns, # number of BED columns to read - OPTIONAL
124 my ( $line, @fields, %entry );
128 $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
130 return if not defined $line;
132 @fields = split "\t", $line;
134 $columns ||= scalar @fields;
139 "CHR" => $fields[ 0 ],
140 "CHR_BEG" => $fields[ 1 ],
141 "CHR_END" => $fields[ 2 ] - 1,
144 elsif ( $columns == 4 )
147 "CHR" => $fields[ 0 ],
148 "CHR_BEG" => $fields[ 1 ],
149 "CHR_END" => $fields[ 2 ] - 1,
150 "Q_ID" => $fields[ 3 ],
153 elsif ( $columns == 5 )
156 "CHR" => $fields[ 0 ],
157 "CHR_BEG" => $fields[ 1 ],
158 "CHR_END" => $fields[ 2 ] - 1,
159 "Q_ID" => $fields[ 3 ],
160 "SCORE" => $fields[ 4 ],
163 elsif ( $columns == 6 )
166 "CHR" => $fields[ 0 ],
167 "CHR_BEG" => $fields[ 1 ],
168 "CHR_END" => $fields[ 2 ] - 1,
169 "Q_ID" => $fields[ 3 ],
170 "SCORE" => $fields[ 4 ],
171 "STRAND" => $fields[ 5 ],
174 elsif ( $columns == 12 )
177 "CHR" => $fields[ 0 ],
178 "CHR_BEG" => $fields[ 1 ],
179 "CHR_END" => $fields[ 2 ] - 1,
180 "Q_ID" => $fields[ 3 ],
181 "SCORE" => $fields[ 4 ],
182 "STRAND" => $fields[ 5 ],
183 "THICK_BEG" => $fields[ 6 ],
184 "THICK_END" => $fields[ 7 ] - 1,
185 "ITEMRGB" => $fields[ 8 ],
186 "BLOCKCOUNT" => $fields[ 9 ],
187 "BLOCKSIZES" => $fields[ 10 ],
188 "Q_BEGS" => $fields[ 11 ],
193 Maasha::Common::error( qq(Bad BED format in line->$line<-) );
196 $entry{ "REC_TYPE" } = "BED";
197 $entry{ "BED_LEN" } = $entry{ "CHR_END" } - $entry{ "CHR_BEG" } + 1;
198 $entry{ "BED_COLS" } = $columns;
200 return wantarray ? %entry : \%entry;
206 # Martin A. Hansen, January 2008.
208 # Given a path to a BED file, read in all entries
211 my ( $path, # full path to BED file
212 $columns, # number of columns in BED file - OPTIONAL (but is faster)
217 my ( $fh, $entry, @list );
219 $fh = Maasha::Common::read_open( $path );
221 while ( $entry = bed_get_entry( $fh ) ) {
227 return wantarray ? @list : \@list;
231 sub bed_entry_put_array
233 # Martin A. Hansen, Septermber 2008.
235 # Writes a BED entry array to file.
237 # IMPORTANT! This function does not correct the
238 # BED_END position that is assumed to be in the
239 # UCSC positions scheme.
242 $fh, # file handle - OPTIONAL
243 $cols, # number of columns in BED file - OPTIONAL
248 $fh = \*STDOUT if not defined $fh;
250 if ( defined $cols ) {
251 print $fh join( "\t", @{ $record }[ 0 .. $cols - 1 ] ), "\n";
253 print $fh join( "\t", @{ $record } ), "\n";
260 # Martin A. Hansen, Septermber 2007.
262 # Writes a BED entry to file.
264 # NB, this could really be more robust!?
266 my ( $record, # hashref
267 $fh, # file handle - OPTIONAL
268 $columns, # number of columns in BED file - OPTIONAL (but is faster)
275 $columns ||= 12; # max number of columns possible
279 push @fields, $record->{ "CHR" };
280 push @fields, $record->{ "CHR_BEG" };
281 push @fields, $record->{ "CHR_END" } + 1;
283 elsif ( $columns == 4 )
285 $record->{ "Q_ID" } =~ s/\s+/_/g;
287 push @fields, $record->{ "CHR" };
288 push @fields, $record->{ "CHR_BEG" };
289 push @fields, $record->{ "CHR_END" } + 1;
290 push @fields, $record->{ "Q_ID" };
292 elsif ( $columns == 5 )
294 $record->{ "Q_ID" } =~ s/\s+/_/g;
295 $record->{ "SCORE" } =~ s/\.\d*//;
297 push @fields, $record->{ "CHR" };
298 push @fields, $record->{ "CHR_BEG" };
299 push @fields, $record->{ "CHR_END" } + 1;
300 push @fields, $record->{ "Q_ID" };
301 push @fields, $record->{ "SCORE" };
303 elsif ( $columns == 6 )
305 $record->{ "Q_ID" } =~ s/\s+/_/g;
306 $record->{ "SCORE" } =~ s/\.\d*//;
308 push @fields, $record->{ "CHR" };
309 push @fields, $record->{ "CHR_BEG" };
310 push @fields, $record->{ "CHR_END" } + 1;
311 push @fields, $record->{ "Q_ID" };
312 push @fields, $record->{ "SCORE" };
313 push @fields, $record->{ "STRAND" };
317 $record->{ "Q_ID" } =~ s/\s+/_/g;
318 $record->{ "SCORE" } =~ s/\.\d*//;
320 push @fields, $record->{ "CHR" };
321 push @fields, $record->{ "CHR_BEG" };
322 push @fields, $record->{ "CHR_END" } + 1;
323 push @fields, $record->{ "Q_ID" };
324 push @fields, $record->{ "SCORE" };
325 push @fields, $record->{ "STRAND" };
326 push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" };
327 push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" };
328 push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" };
329 push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" };
330 push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" };
331 push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" };
335 print $fh join( "\t", @fields ), "\n";
337 print join( "\t", @fields ), "\n";
344 # Martin A. Hansen, January 2008.
346 # Write a list of BED entries.
348 my ( $entries, # list of entries,
349 $fh, # file handle - OPTIONAL
354 map { bed_put_entry( $_, $fh ) } @{ $entries };
360 # Martin A. Hansen, March 2008.
362 # Given a bed record, analysis this to give information
363 # about intron/exon sizes.
365 my ( $entry, # BED entry
370 my ( $i, @begs, @lens, $exon_max, $exon_min, $exon_len, $exon_tot, $intron_max, $intron_min, $intron_len, $intron_tot );
373 $exon_min = 9999999999;
375 $intron_min = 9999999999;
377 $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" };
379 @begs = split /,/, $entry->{ "Q_BEGS" };
380 @lens = split /,/, $entry->{ "BLOCKSIZES" };
382 for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
384 $exon_len = @lens[ $i ];
386 $entry->{ "EXON_LEN_$i" } = $exon_len;
388 $exon_max = $exon_len if $exon_len > $exon_max;
389 $exon_min = $exon_len if $exon_len < $exon_min;
391 $exon_tot += $exon_len;
394 $entry->{ "EXON_LEN_-1" } = $exon_len;
395 $entry->{ "EXON_MAX_LEN" } = $exon_max;
396 $entry->{ "EXON_MIN_LEN" } = $exon_min;
397 $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } );
399 $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1;
400 $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0;
402 if ( $entry->{ "INTRONS" } )
404 for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
406 $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] );
408 $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len;
410 $intron_max = $intron_len if $intron_len > $intron_max;
411 $intron_min = $intron_len if $intron_len < $intron_min;
413 $intron_tot += $intron_len;
416 $entry->{ "INTRON_LEN_-1" } = $intron_len;
417 $entry->{ "INTRON_MAX_LEN" } = $intron_max;
418 $entry->{ "INTRON_MIN_LEN" } = $intron_min;
419 $entry->{ "INTRON_MEAN_LEN" } = int( $intron_tot / $entry->{ "INTRONS" } );
422 return wantarray ? %{ $entry } : $entry;
428 # Martin A. Hansen, Septermber 2008
430 # Sorts a BED file using the c program
431 # "bed_sort" specifing a sort mode:
433 # 1: chr AND chr_beg.
434 # 2: chr AND strand AND chr_beg.
436 # 4: strand AND chr_beg.
438 my ( $bed_file, # BED file to sort
439 $sort_mode, # See above.
440 $cols, # Number of columns in BED file
443 &Maasha::Common::run( "bed_sort", "--sort $sort_mode --cols $cols $bed_file" );
447 sub bed_split_to_files
449 # Martin A. Hansen, Septermber 2008
451 # Given a list of BED files, split these
452 # into temporary files based on the chromosome
453 # name. Returns a list of the temporary files.
455 my ( $bed_files, # list of BED files to split
456 $cols, # number of columns
457 $tmp_dir, # temporary directory
462 my ( $bed_file, $fh_in, $entry, $key, %fh_hash, @tmp_files );
464 foreach $bed_file ( @{ $bed_files } )
466 $fh_in = Maasha::Common::read_open( $bed_file );
468 while ( $entry = bed_entry_get_array( $fh_in, $cols ) )
470 $key = $entry->[ CHR ];
472 $fh_hash{ $key } = Maasha::Common::write_open( "$tmp_dir/$key.temp" ) if not exists $fh_hash{ $key };
474 bed_entry_put_array( $entry, $fh_hash{ $key } );
480 foreach $key ( sort keys %fh_hash )
482 push @tmp_files, "$tmp_dir/$key.temp";
484 close $fh_hash{ $key };
487 return wantarray ? @tmp_files : \@tmp_files;
491 sub bed_merge_entries
493 # Martin A. Hansen, February 2008.
495 # Merge a list of given BED entries in one big entry.
497 my ( $entries, # list of BED entries to be merged
502 my ( $i, @q_ids, @q_begs, @blocksizes, @new_q_begs, @new_blocksizes, %new_entry );
504 @{ $entries } = sort { $a->{ "CHR_BEG" } <=> $b->{ "CHR_BEG" } } @{ $entries };
506 for ( $i = 0; $i < @{ $entries }; $i++ )
508 Maasha::Common::error( qq(Attempted merge of BED entries from different chromosomes) ) if $entries->[ 0 ]->{ "CHR" } ne $entries->[ $i ]->{ "CHR" };
509 Maasha::Common::error( qq(Attempted merge of BED entries from different strands) ) if $entries->[ 0 ]->{ "STRAND" } ne $entries->[ $i ]->{ "STRAND" };
511 push @q_ids, $entries->[ $i ]->{ "Q_ID" } || sprintf( "ID%06d", $i );
513 if ( exists $entries->[ $i ]->{ "Q_BEGS" } )
515 @q_begs = split ",", $entries->[ $i ]->{ "Q_BEGS" };
516 @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" };
521 @blocksizes = $entries->[ $i ]->{ "CHR_END" } - $entries->[ $i ]->{ "CHR_BEG" } + 1;
524 map { $_ += $entries->[ $i ]->{ "CHR_BEG" } } @q_begs;
526 push @new_q_begs, @q_begs;
527 push @new_blocksizes, @blocksizes;
530 map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs;
533 CHR => $entries->[ 0 ]->{ "CHR" },
534 CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" },
535 CHR_END => $entries->[ -1 ]->{ "CHR_END" },
537 BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
539 Q_ID => join( ":", @q_ids ),
541 STRAND => $entries->[ 0 ]->{ "STRAND" } || "+",
542 THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" },
543 THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
545 BLOCKCOUNT => scalar @new_q_begs,
546 BLOCKSIZES => join( ",", @new_blocksizes ),
547 Q_BEGS => join( ",", @new_q_begs ),
550 return wantarray ? %new_entry : \%new_entry;
556 # Martin A. Hansen, February 2008.
558 # Splits a given BED entry into a list of blocks,
559 # which are returned. A list of 6 column BED entry is returned.
561 my ( $entry, # BED entry hashref
566 my ( @q_begs, @blocksizes, $block, @blocks, $i );
568 if ( exists $entry->{ "BLOCKCOUNT" } )
570 @q_begs = split ",", $entry->{ "Q_BEGS" };
571 @blocksizes = split ",", $entry->{ "BLOCKSIZES" };
573 for ( $i = 0; $i < @q_begs; $i++ )
577 $block->{ "CHR" } = $entry->{ "CHR" };
578 $block->{ "CHR_BEG" } = $entry->{ "CHR_BEG" } + $q_begs[ $i ];
579 $block->{ "CHR_END" } = $entry->{ "CHR_BEG" } + $q_begs[ $i ] + $blocksizes[ $i ] - 1;
580 $block->{ "Q_ID" } = $entry->{ "Q_ID" } . sprintf( "_%03d", $i );
581 $block->{ "SCORE" } = $entry->{ "SCORE" };
582 $block->{ "STRAND" } = $entry->{ "STRAND" };
583 $block->{ "BED_LEN" } = $block->{ "CHR_END" } - $block->{ "CHR_BEG" } + 1,
584 $block->{ "BED_COLS" } = 6;
585 $block->{ "REC_TYPE" } = "BED";
587 push @blocks, $block;
592 @blocks = @{ $entry };
595 return wantarray ? @blocks : \@blocks;
602 # Martin A. Hansen, February 2008.
604 # Checks if two BED entries overlap and
605 # return 1 if so - else 0;
607 my ( $entry1, # hashref
609 $no_strand, # don't check strand flag - OPTIONAL
614 return 0 if $entry1->{ "CHR" } ne $entry2->{ "CHR" };
615 return 0 if $entry1->{ "STRAND" } ne $entry2->{ "STRAND" };
617 if ( $entry1->{ "CHR_END" } < $entry2->{ "CHR_BEG" } or $entry1->{ "CHR_BEG" } > $entry2->{ "CHR_END" } ) {
625 sub bed_upload_to_ucsc
627 # Martin A. Hansen, September 2007.
629 # Upload a BED file to the UCSC database.
631 my ( $tmp_dir, # temporary directory
632 $file, # file to upload,
633 $options, # argument hashref
634 $append, # flag indicating table should be appended
639 my ( $args, $table, $sql_file, $fh_out, $fh_in );
642 $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", "-oldTable", $file;
644 $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", $file;
647 if ( $options->{ "table" } =~ /rnaSecStr/ )
649 $table = $options->{ "table" };
651 print qq(uploading secondary structure table:"$table"\n) if $options->{ "verbose" };
653 $sql_file = "$tmp_dir/upload_RNA_SS.sql";
655 $fh_out = Maasha::Common::write_open( $sql_file );
658 CREATE TABLE $table (
659 bin smallint not null, # Bin number for browser speedup
660 chrom varchar(255) not null, # Chromosome or FPC contig
661 chromStart int unsigned not null, # Start position in chromosome
662 chromEnd int unsigned not null, # End position in chromosome
663 name varchar(255) not null, # Name of item
664 score int unsigned not null, # Score from 0-1000
665 strand char(1) not null, # + or -
666 size int unsigned not null, # Size of element.
667 secStr longblob not null, # Parentheses and '.'s which define the secondary structure
668 conf longblob not null, # Confidence of secondary-structure annotation per position (0.0-1.0).
671 INDEX(chrom(8), bin),
672 INDEX(chrom(8), chromStart)
678 Maasha::Common::run( "hgLoadBed", "-notItemRgb -sqlTable=$sql_file $options->{ 'database' } $options->{ 'table' } -tmpDir=$tmp_dir $file > /dev/null 2>&1" );
684 Maasha::Common::run( "hgLoadBed", "$args > /dev/null 2>&1" );
689 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
693 # Martin A. Hansen, November 2008
695 # Converts PSL record keys to Biopiece record keys.
706 BLOCKSIZES => $psl->{ 'blockSize' },
707 SNUMINSERT => $psl->{ 'tNumInsert' },
708 Q_END => $psl->{ 'qEnd' },
709 SBASEINSERT => $psl->{ 'tBaseInsert' },
710 S_END => $psl->{ 'tEnd' },
711 QBASEINSERT => $psl->{ 'qBaseInsert' },
712 REPMATCHES => $psl->{ 'repMatches' },
713 QNUMINSERT => $psl->{ 'qNumInsert' },
714 MISMATCHES => $psl->{ 'misMatches' },
715 BLOCKCOUNT => $psl->{ 'blockCount' },
716 Q_LEN => $psl->{ 'qSize' },
717 S_ID => $psl->{ 'tName' },
718 STRAND => $psl->{ 'strand' },
719 Q_ID => $psl->{ 'qName' },
720 MATCHES => $psl->{ 'matches' },
721 S_LEN => $psl->{ 'tSize' },
722 NCOUNT => $psl->{ 'nCount' },
723 Q_BEGS => $psl->{ 'qStarts' },
724 S_BEGS => $psl->{ 'tStarts' },
725 S_BEG => $psl->{ 'tStart' },
726 Q_BEG => $psl->{ 'qStart ' },
729 $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
730 $record{ "SPAN" } = $record{ "Q_END" } - $record{ "Q_BEG" };
732 return wantarray ? %record : \%record;
738 # Martin A. Hansen, August 2008.
740 # Reads PSL next entry from a PSL file and returns a record.
742 my ( $fh, # file handle of PSL filefull path to PSL file
747 my ( $line, @fields, %record );
749 while ( $line = <$fh> )
753 @fields = split "\t", $line;
755 if ( scalar @fields == 21 )
759 MATCHES => $fields[ 0 ],
760 MISMATCHES => $fields[ 1 ],
761 REPMATCHES => $fields[ 2 ],
762 NCOUNT => $fields[ 3 ],
763 QNUMINSERT => $fields[ 4 ],
764 QBASEINSERT => $fields[ 5 ],
765 SNUMINSERT => $fields[ 6 ],
766 SBASEINSERT => $fields[ 7 ],
767 STRAND => $fields[ 8 ],
768 Q_ID => $fields[ 9 ],
769 Q_LEN => $fields[ 10 ],
770 Q_BEG => $fields[ 11 ],
771 Q_END => $fields[ 12 ] - 1,
772 S_ID => $fields[ 13 ],
773 S_LEN => $fields[ 14 ],
774 S_BEG => $fields[ 15 ],
775 S_END => $fields[ 16 ] - 1,
776 BLOCKCOUNT => $fields[ 17 ],
777 BLOCKSIZES => $fields[ 18 ],
778 Q_BEGS => $fields[ 19 ],
779 S_BEGS => $fields[ 20 ],
782 $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
783 $record{ "SPAN" } = $fields[ 12 ] - $fields[ 11 ];
785 return wantarray ? %record : \%record;
795 # Martin A. Hansen, February 2008.
797 # Reads PSL entries and returns a list of records.
799 my ( $path, # full path to PSL file
804 my ( $fh, @lines, @fields, $i, %record, @records );
806 $fh = Maasha::Common::read_open( $path );
814 for ( $i = 5; $i < @lines; $i++ )
816 @fields = split "\t", $lines[ $i ];
818 Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21;
824 MATCHES => $fields[ 0 ],
825 MISMATCHES => $fields[ 1 ],
826 REPMATCHES => $fields[ 2 ],
827 NCOUNT => $fields[ 3 ],
828 QNUMINSERT => $fields[ 4 ],
829 QBASEINSERT => $fields[ 5 ],
830 SNUMINSERT => $fields[ 6 ],
831 SBASEINSERT => $fields[ 7 ],
832 STRAND => $fields[ 8 ],
833 Q_ID => $fields[ 9 ],
834 Q_LEN => $fields[ 10 ],
835 Q_BEG => $fields[ 11 ],
836 Q_END => $fields[ 12 ] - 1,
837 S_ID => $fields[ 13 ],
838 S_LEN => $fields[ 14 ],
839 S_BEG => $fields[ 15 ],
840 S_END => $fields[ 16 ] - 1,
841 BLOCKCOUNT => $fields[ 17 ],
842 BLOCKSIZES => $fields[ 18 ],
843 Q_BEGS => $fields[ 19 ],
844 S_BEGS => $fields[ 20 ],
847 $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
849 push @records, { %record };
852 return wantarray ? @records : \@records;
858 # Martin A. Hansen, September 2007.
860 # Write a PSL header to file.
862 my ( $fh, # file handle - OPTIONAL
867 $fh = \*STDOUT if not $fh;
869 print $fh qq(psLayout version 3
870 match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart match match count bases count bases name size start end name size start end count
871 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
878 # Martin A. Hansen, September 2007.
880 # Write a PSL entry to file.
882 my ( $record, # hashref
883 $fh, # file handle - OPTIONAL
888 $fh = \*STDOUT if not $fh;
892 push @output, $record->{ "MATCHES" };
893 push @output, $record->{ "MISMATCHES" };
894 push @output, $record->{ "REPMATCHES" };
895 push @output, $record->{ "NCOUNT" };
896 push @output, $record->{ "QNUMINSERT" };
897 push @output, $record->{ "QBASEINSERT" };
898 push @output, $record->{ "SNUMINSERT" };
899 push @output, $record->{ "SBASEINSERT" };
900 push @output, $record->{ "STRAND" };
901 push @output, $record->{ "Q_ID" };
902 push @output, $record->{ "Q_LEN" };
903 push @output, $record->{ "Q_BEG" };
904 push @output, $record->{ "Q_END" } + 1;
905 push @output, $record->{ "S_ID" };
906 push @output, $record->{ "S_LEN" };
907 push @output, $record->{ "S_BEG" };
908 push @output, $record->{ "S_END" } + 1;
909 push @output, $record->{ "BLOCKCOUNT" };
910 push @output, $record->{ "BLOCKSIZES" };
911 push @output, $record->{ "Q_BEGS" };
912 push @output, $record->{ "S_BEGS" };
914 print $fh join( "\t", @output ), "\n";
918 sub psl_upload_to_ucsc
920 # Martin A. Hansen, September 2007.
922 # Upload a PSL file to the UCSC database.
924 my ( $file, # file to upload,
925 $options, # argument hashref
926 $append, # flag indicating table should be appended
934 $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file;
936 $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file;
939 Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" );
943 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
946 sub ucsc_config_get_entry
948 # Martin A. Hansen, November 2008.
950 # Given a filehandle to a UCSC Genome Browser
951 # config file (.ra) get the next entry and
952 # return as a hash. Entries are separated by blank
953 # lines. # lines are skipped unless it is the lines:
957 my ( $fh, # file hanlde
962 my ( $line, %record );
964 while ( $line = <$fh> )
966 if ( $line =~ /Track added by 'upload_to_ucsc' (\S+) (\S+)/) {
967 $record{ 'date' } = $1;
968 $record{ 'time' } = $2;
969 } elsif ( $line =~ /^# date: (.+)/ ) {
970 $record{ 'date' } = $1;
971 } elsif ( $line =~ /^# time: (.+)/ ) {
972 $record{ 'time' } = $1;
973 } elsif ( $line =~ /^# (?:database:|Database) (.+)/ ) {
974 $record{ 'database' } = $1;
975 } elsif ( $line =~ /^#/ ) {
977 } elsif ( $line =~ /(\S+)\s+(.+)/ ) {
981 last if $line =~ /^$/ and exists $record{ "track" };
984 return undef if not exists $record{ "track" };
986 return wantarray ? %record : \%record;
990 sub ucsc_config_put_entry
992 # Martin A. Hansen, November 2008.
994 # Outputs a Biopiece record (a hashref)
995 # to a filehandle or STDOUT.
997 my ( $record, # hashref
998 $fh_out, # file handle
1003 my ( $date, $time );
1005 $fh_out ||= \*STDOUT;
1007 ( $date, $time ) = split " ", Maasha::Common::time_stamp();
1009 $record->{ "date" } ||= $date;
1010 $record->{ "time" } ||= $time;
1012 print $fh_out "\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n";
1014 map { print $fh_out "# $_: $record->{ $_ }\n" if exists $record->{ $_ } } qw( date time database );
1015 map { print $fh_out "$_ $record->{ $_ }\n" if exists $record->{ $_ } } qw( track
1035 sub update_my_tracks
1037 # Martin A. Hansen, September 2007.
1039 # Update the /home/user/ucsc/my_tracks.ra file and executes makeCustomTracks.pl
1041 my ( $options, # hashref
1047 my ( $file, $fh_in, $fh_out, $line, $time );
1049 $file = $ENV{ "HOME" } . "/ucsc/my_tracks.ra";
1051 # ---- create a backup ----
1053 $fh_in = Maasha::Common::read_open( $file );
1054 $fh_out = Maasha::Common::write_open( "$file~" );
1056 while ( $line = <$fh_in> ) {
1057 print $fh_out $line;
1063 # ---- append track ----
1065 $time = Maasha::Common::time_stamp();
1067 $fh_out = Maasha::Common::append_open( $file );
1069 if ( $type eq "sec_struct" )
1071 print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
1073 print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n";
1075 print $fh_out "# Database $options->{ 'database' }\n\n";
1077 print $fh_out "track $options->{ 'table' }\n";
1078 print $fh_out "shortLabel $options->{ 'short_label' }\n";
1079 print $fh_out "longLabel $options->{ 'long_label' }\n";
1080 print $fh_out "group $options->{ 'group' }\n";
1081 print $fh_out "priority $options->{ 'priority' }\n";
1082 print $fh_out "visibility $options->{ 'visibility' }\n";
1083 print $fh_out "color $options->{ 'color' }\n";
1084 print $fh_out "type bed 6 +\n";
1085 print $fh_out "mafTrack multiz17way\n";
1087 print $fh_out "\n# //\n";
1091 print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
1093 print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n";
1095 print $fh_out "# Database $options->{ 'database' }\n\n";
1097 print $fh_out "track $options->{ 'table' }\n";
1098 print $fh_out "shortLabel $options->{ 'short_label' }\n";
1099 print $fh_out "longLabel $options->{ 'long_label' }\n";
1100 print $fh_out "group $options->{ 'group' }\n";
1101 print $fh_out "priority $options->{ 'priority' }\n";
1102 print $fh_out "useScore 1\n" if $options->{ 'use_score' };
1103 print $fh_out "visibility $options->{ 'visibility' }\n";
1104 print $fh_out "maxHeightPixels 50:50:11\n" if $type eq "wig 0";
1105 print $fh_out "color $options->{ 'color' }\n";
1106 print $fh_out "type $type\n";
1108 print $fh_out "\n# //\n";
1113 Maasha::Common::run( "ucscMakeTracks.pl", "-b > /dev/null 2>&1" );
1117 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> fixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1120 sub fixedstep_get_entry
1122 # Martin A. Hansen, December 2007.
1124 # Given a file handle to a PhastCons file get the
1125 # next entry which is all the lines after a "fixedStep"
1126 # line and until the next "fixedStep" line or EOF.
1128 my ( $fh, # filehandle
1131 # Returns a list of lines
1133 my ( $entry, @lines );
1135 local $/ = "\nfixedStep ";
1141 @lines = split "\n", $entry;
1143 return if @lines == 0;
1145 $lines[ 0 ] =~ s/fixedStep?\s*//;
1147 return wantarray ? @lines : \@lines;
1151 sub fixedstep_index_create
1153 # Martin A. Hansen, January 2008.
1155 # Indexes a concatenated fixedStep file.
1156 # The index consists of a hash with chromosomes as keys,
1157 # and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
1159 my ( $path, # path to fixedStep file
1164 my ( $fh, $pos, $index_beg, $index_len, $entry, $locator, $chr, $step, $beg, $end, $len, %index, $i );
1166 $fh = Maasha::Common::read_open( $path );
1170 while ( $entry = Maasha::UCSC::fixedstep_get_entry( $fh ) )
1172 $locator = shift @{ $entry };
1174 if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
1177 $beg = $2 - 1; # fixedStep files are 1-based
1182 Maasha::Common::error( qq(Could not parse locator: $locator) );
1185 $pos += length( $locator ) + 11;
1189 # map { $pos += length( $_ ) + 1 } @{ $entry };
1191 $pos += 6 * scalar @{ $entry };
1193 $index_len = $pos - $index_beg;
1195 push @{ $index{ $chr } }, [ $beg, undef, $beg + scalar @{ $entry } - 1, $index_beg, $index_len ];
1200 foreach $chr ( keys %index )
1202 for ( $i = 0; $i < @{ $index{ $chr } } - 1; $i++ ) {
1203 $index{ $chr }->[ $i ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ];
1206 $index{ $chr }->[ -1 ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ FS_CHR_END ] + 1;
1209 return wantarray ? %index : \%index;
1213 sub fixedstep_index_store
1215 # Martin A. Hansen, January 2008.
1217 # Writes a fixedStep index to binary file.
1219 my ( $path, # full path to file
1220 $index, # list with index
1225 Maasha::Common::file_store( $path, $index );
1229 sub fixedstep_index_retrieve
1231 # Martin A. Hansen, January 2008.
1233 # Retrieves a fixedStep index from binary file.
1235 my ( $path, # full path to file
1242 $index = Maasha::Common::file_retrieve( $path );
1244 return wantarray ? %{ $index } : $index;
1248 sub fixedstep_index_lookup
1250 # Martin A. Hansen, January 2008.
1252 # Retrieve fixedStep scores from a indexed
1253 # fixedStep file given a chromosome and
1254 # begin and end positions.
1256 my ( $index, # data structure
1257 $fh, # filehandle to datafile
1259 $chr_beg, # chromosome beg
1260 $chr_end, # chromosome end
1261 $flank, # include flanking region - OPTIONAL
1266 my ( $index_beg, $index_end, $i, $c, $beg, $end, @vals, $scores );
1273 # print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n";
1275 if ( exists $index->{ $chr } )
1277 $index_beg = Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_beg );
1279 if ( $index_beg < 0 ) {
1280 Maasha::Common::error( qq(Index search failed - begin index position doesn't exists: $chr_beg) );
1283 if ( $chr_end < $index->{ $chr }->[ $index_beg ]->[ 1 ] )
1285 $index_end = $index_beg;
1289 $index_end = Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_end );
1291 if ( $index_end < 0 ) {
1292 Maasha::Common::error( qq(Index search failed - end index position doesn't exists: $chr_end) );
1296 map { $scores->[ $_ ] = 0 } 0 .. $chr_end - $chr_beg;
1298 if ( $index_beg == $index_end )
1300 $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
1301 $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
1303 if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] )
1305 @vals = split "\n", Maasha::Common::file_read(
1307 $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
1308 6 * ( $end - $beg + 1 ),
1312 for ( $c = 0; $c < @vals; $c++ ) {
1313 $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
1318 $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
1320 # print Dumper( $beg, $index->{ $chr }->[ $index_beg ] );
1321 # print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ FS_NEXT_CHR_BEG ] );
1327 if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] )
1329 @vals = split "\n", Maasha::Common::file_read(
1331 $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
1332 6 * ( $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] - $beg + 1 ),
1335 for ( $c = 0; $c < @vals; $c++ ) {
1336 $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
1340 $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
1342 if ( $end <= $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] )
1344 @vals = split "\n", Maasha::Common::file_read(
1346 $index->{ $chr }->[ $index_end ]->[ FS_INDEX_BEG ],
1347 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] + 1 ),
1350 for ( $c = 0; $c < @vals; $c++ ) {
1351 $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
1355 for ( $i = $index_beg + 1; $i <= $index_end - 1; $i++ )
1357 @vals = split "\n", Maasha::Common::file_read(
1359 $index->{ $chr }->[ $i ]->[ FS_INDEX_BEG ],
1360 6 * ( $index->{ $chr }->[ $i ]->[ FS_CHR_END ] - $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] + 1 ),
1363 for ( $c = 0; $c < @vals; $c++ ) {
1364 $scores->[ $c + $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
1371 Maasha::Common::error( qq(Chromosome "$chr" was not found in index) );
1374 return wantarray ? @{ $scores } : $scores;
1378 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1383 # Martin A. Hansen, July 2008
1385 # Create a fixedStep index for PhastCons data.
1387 my ( $file, # file to index
1388 $dir, # dir with file
1395 $index = fixedstep_index_create( "$dir/$file" );
1397 fixedstep_index_store( "$dir/$file.index", $index );
1401 sub phastcons_parse_entry
1403 # Martin A. Hansen, December 2007.
1405 # Given a PhastCons entry converts this to a
1406 # list of super blocks.
1408 my ( $lines, # list of lines
1409 $args, # argument hash
1414 my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
1416 $info = shift @{ $lines };
1418 if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
1424 die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
1429 while ( $i < @{ $lines } )
1431 if ( $lines->[ $i ] >= $args->{ "threshold" } )
1435 while ( $c < @{ $lines } )
1437 if ( $lines->[ $c ] < $args->{ "threshold" } )
1441 while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
1445 if ( $j - $c > $args->{ "gap" } )
1447 if ( $c - $i >= $args->{ "min" } )
1451 CHR_BEG => $beg + $i - 1,
1452 CHR_END => $beg + $c - 2,
1470 if ( $c - $i >= $args->{ "min" } )
1474 CHR_BEG => $beg + $i - 1,
1475 CHR_END => $beg + $c - 2,
1490 while ( $i < @blocks )
1494 while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
1499 push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
1504 foreach $super_block ( @super_blocks )
1506 foreach $block ( @{ $super_block } )
1508 push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
1509 push @lens, $block->{ "CHR_LEN" } - 1;
1515 CHR => $super_block->[ 0 ]->{ "CHR" },
1516 CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
1517 CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
1521 THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
1522 THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
1523 ITEMRGB => "0,200,100",
1524 BLOCKCOUNT => scalar @{ $super_block },
1525 BLOCKSIZES => join( ",", @lens ),
1526 Q_BEGS => join( ",", @begs ),
1533 return wantarray ? @entries : \@entries;
1537 sub phastcons_normalize
1539 # Martin A. Hansen, January 2008.
1541 # Normalizes a list of lists with PhastCons scores,
1542 # in such a way that each list contains the same number
1543 # or PhastCons scores.
1545 my ( $AoA, # AoA with PhastCons scores
1550 my ( $list, $max, $min, $mean, $diff );
1555 foreach $list ( @{ $AoA } )
1557 $min = scalar @{ $list } if scalar @{ $list } < $min;
1558 $max = scalar @{ $list } if scalar @{ $list } > $max;
1561 $mean = int( ( $min + $max ) / 2 );
1563 # print STDERR "min->$min max->$max mean->$mean\n";
1565 foreach $list ( @{ $AoA } )
1567 $diff = scalar @{ $list } - $mean;
1569 phastcons_list_inflate( $list, abs( $diff ) ) if $diff < 0;
1570 phastcons_list_deflate( $list, $diff ) if $diff > 0;
1573 return wantarray ? @{ $AoA } : $AoA;
1577 sub phastcons_list_inflate
1579 # Martin A. Hansen, January 2008.
1581 # Inflates a list with a given number of elements
1582 # in such a way that the extra elements are introduced
1583 # evenly over the entire length of the list. The value
1584 # of the extra elements is based on a mean of the
1585 # adjacent elements.
1587 my ( $list, # list of elements
1588 $diff, # number of elements to introduce
1593 my ( $len, $space, $i, $pos );
1595 $len = scalar @{ $list };
1597 $space = $len / $diff;
1599 for ( $i = 0; $i < $diff; $i++ )
1601 $pos = int( ( $space / 2 ) + $i * $space );
1603 splice @{ $list }, $pos, 0, ( $list->[ $pos - 1 ] + $list->[ $pos + 1 ] ) / 2;
1604 # splice @{ $list }, $pos, 0, "X";
1607 die qq(ERROR: bad inflate\n) if scalar @{ $list } != $len + $diff;
1611 sub phastcons_list_deflate
1613 # Martin A. Hansen, January 2008.
1615 # Deflates a list by removing a given number of elements
1616 # evenly distributed over the entire list.
1618 my ( $list, # list of elements
1619 $diff, # number of elements to remove
1624 my ( $len, $space, $i, $pos );
1626 $len = scalar @{ $list };
1628 $space = ( $len - $diff ) / $diff;
1630 for ( $i = 0; $i < $diff; $i++ )
1632 $pos = int( ( $space / 2 ) + $i * $space );
1634 splice @{ $list }, $pos, 1;
1637 die qq(ERROR: bad deflate\n) if scalar @{ $list } != $len - $diff;
1643 # Martin A. Hansen, January 2008.
1645 # Given a normalized PhastCons matrix in an AoA,
1646 # calculate the mean for each column and return as a list.
1648 my ( $AoA, # AoA with normalized PhastCons scores
1655 $AoA = Maasha::Matrix::matrix_flip( $AoA );
1657 map { push @list, Maasha::Calc::mean( $_ ) } @{ $AoA };
1659 return wantarray ? @list : \@list;
1663 sub phastcons_median
1665 # Martin A. Hansen, January 2008.
1667 # Given a normalized PhastCons matrix in an AoA,
1668 # calculate the median for each column and return as a list.
1670 my ( $AoA, # AoA with normalized PhastCons scores
1677 $AoA = Maasha::Matrix::matrix_flip( $AoA );
1679 map { push @list, Maasha::Calc::median( $_ ) } @{ $AoA };
1681 return wantarray ? @list : \@list;
1685 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MULTIPLE ALIGNMENT FILES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1690 # Martin A. Hansen, April 2008.
1692 # Executes mafFrag to extract a subalignment from a multiz track
1693 # in the UCSC genome browser database.
1695 my ( $tmp_dir, # temporary directory
1696 $database, # genome database
1697 $table, # table with the multiz track
1699 $beg, # begin position
1700 $end, # end position
1704 # Returns a list of record
1706 my ( $tmp_file, $align );
1708 $tmp_file = "$tmp_dir/maf_extract.maf";
1710 Maasha::Common::run( "mafFrag", "$database $table $chr $beg $end $strand $tmp_file" );
1712 $align = maf_parse( $tmp_file );
1716 return wantarray ? @{ $align } : $align;
1722 # Martin A. Hansen, April 2008.
1725 my ( $path, # full path to MAF file
1728 # Returns a list of record.
1730 my ( $fh, $line, @fields, @align );
1732 $fh = Maasha::Common::read_open( $path );
1734 while ( $line = <$fh> )
1738 if ( $line =~ /^s/ )
1740 @fields = split / /, $line;
1743 SEQ_NAME => $fields[ 1 ],
1744 SEQ => $fields[ -1 ],
1746 ALIGN_LEN => length $fields[ -1 ],
1753 return wantarray ? @align : \@align;
1757 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> WIGGLE FORMAT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1760 sub fixedstep_put_entry
1762 # Martin A. Hansen, April 2008.
1764 # Outputs a block of fixedStep values.
1765 # Used for outputting wiggle data.
1767 my ( $chr, # chromosome
1768 $beg, # start position
1769 $block, # list of scores
1770 $fh, # filehandle - OPTIONAL
1771 $log10, # flag indicating that log10 scores should be used
1776 $beg += 1; # fixedStep format is 1 based.
1780 print $fh "fixedStep chrom=$chr start=$beg step=1\n";
1783 map { printf( $fh "%f\n", Maasha::Calc::log10( $_ + 1 ) ) } @{ $block };
1785 map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block };
1790 sub wiggle_upload_to_ucsc
1792 # Martin A. Hansen, May 2008.
1794 # Upload a wiggle file to the UCSC database.
1796 my ( $tmp_dir, # temporary directory
1797 $wib_dir, # wib directory
1798 $wig_file, # file to upload,
1799 $options, # argument hashref
1806 # $args = join " ", "-tmpDir=$tmp_dir", "-pathPrefix=$wib_dir", $options->{ "database" }, $options->{ 'table' }, $wig_file;
1808 # Maasha::Common::run( "hgLoadWiggle", "$args > /dev/null 2>&1" );
1810 if ( $options->{ 'verbose' } ) {
1811 `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file`;
1813 `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
1818 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MySQL CONF <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1823 # Martin A. Hansen, May 2008
1825 # Fetches the MySQL database user name from the
1826 # .hg.conf file in the users home directory.
1830 my ( $fh, $line, $user );
1832 $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
1834 while ( $line = <$fh> )
1838 if ( $line =~ /^db\.user=(.+)/ )
1852 sub ucsc_get_password
1854 # Martin A. Hansen, May 2008
1856 # Fetches the MySQL database password from the
1857 # .hg.conf file in the users home directory.
1861 my ( $fh, $line, $password );
1863 $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
1865 while ( $line = <$fh> )
1869 if ( $line =~ /^db\.password=(.+)/ )
1883 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<