package Maasha::UCSC::Wiggle;
-# Copyright (C) 2007-2008 Martin A. Hansen.
+# Copyright (C) 2007-2010 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
use Data::Dumper;
use Maasha::Common;
use Maasha::Filesys;
+use Maasha::Matrix;
use Maasha::Calc;
use vars qw( @ISA @EXPORT_OK );
use constant {
- BITS => 32, # Number of bits in an integer
- SEQ_MAX => 200_000_000, # Maximum sequence size
- chrom => 0, # BED field names
- chromStart => 1,
- chromEnd => 2,
- name => 3,
- score => 4,
- strand => 5,
- thickStart => 6,
- thickEnd => 7,
- itemRgb => 8,
- blockCount => 9,
- blockSizes => 10,
- blockStarts => 11,
+ BITS => 32, # Number of bits in an integer
+ SEQ_MAX => 200_000_000, # Maximum sequence size
+ chrom => 0, # BED field names
+ chromStart => 1,
+ chromEnd => 2,
+ name => 3,
+ score => 4,
+ strand => 5,
+ thickStart => 6,
+ thickEnd => 7,
+ itemRgb => 8,
+ blockCount => 9,
+ blockSizes => 10,
+ blockStarts => 11,
};
# Converts a Biopiece record to a fixedStep entry.
- my ( $record, # Biopiece record
+ my ( $record, # Biopiece record
+ $opt_key, # Key from option hash
+ $opt_step, # Step from option hash
) = @_;
# Returns a list
- my ( @entry, $beg, $vals );
+ my ( @entry, $chr, $beg, $step, $vals );
- if ( exists $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq 'fixed_step' )
+ $chr = $record->{ 'CHR' } || $record->{ 'S_ID' };
+ $beg = $record->{ 'CHR_BEG' } || $record->{ 'S_BEG' };
+ $step = $record->{ 'STEP' } || $opt_step;
+ $vals = $record->{ 'VALS' };
+ $vals = $record->{ $opt_key } if $opt_key;
+
+ if ( defined $chr and defined $beg and defined $step and defined $vals)
{
- if ( exists $record->{ 'CHR' } and exists $record->{ 'CHR_BEG' } and exists $record->{ 'STEP' } )
- {
- $beg = $record->{ 'CHR_BEG' } + 1; # fixedStep is 1-based
- push @entry, "fixedStep chrom=$record->{ 'CHR' } start=$beg step=$record->{ 'STEP' }";
+ $beg += 1; # fixedStep is 1-based
- $vals = $record->{ 'VALS' };
+ push @entry, "fixedStep chrom=$chr start=$beg step=$step";
- map { push @entry, $_ } split ";", $vals;
- }
- else
- {
- Maasha::Common::error( qq(could not convert Biopiece record to fixedStep) );
- }
+ map { push @entry, $_ } split ";", $vals;
}
- return wantarray ? @entry : \@entry;
+ unless ( @entry ) {
+ return
+ } else {
+ return wantarray ? @entry : \@entry;
+ }
}
}
+sub fixedstep_index
+{
+ # Martin A. Hansen, June 2010.
+
+ # Indexes a specified fixedstep file and saves the index
+ # to a specified directory and index name.
+
+ my ( $path, # fixedstep file to index
+ $dir, # direcotory to place index
+ $name, # name of index
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $index );
+
+ $index = fixedstep_index_create( $path );
+ fixedstep_index_store( "$dir/$name", $index );
+}
+
+
+sub fixedstep_index_create
+{
+ # Martin A. Hansen, June 2010.
+
+ # Indexes a specified fixedStep file.
+ # The index consists of a hash with chromosomes as keys,
+ # and a list of { chr_beg, chr_end, index_beg, index_len }
+ # hashes as values.
+
+ my ( $path, # path to fixedStep file
+ ) = @_;
+
+ # Returns a hashref
+
+ my ( $fh, $pos, $line, $index_beg, $index_len, $chr, $step, $chr_beg, $chr_end, $len, %index );
+
+ $fh = Maasha::Filesys::file_read_open( $path );
+
+ $index_beg = 0;
+ $index_len = 0;
+ $chr_beg = 0;
+ $chr_end = 0;
+ $pos = 0;
+
+ while ( $line = <$fh> )
+ {
+ if ( $line =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)/ )
+ {
+ if ( $chr_end > $chr_beg )
+ {
+ $index_len = $pos - $index_beg;
+
+ push @{ $index{ $chr } }, {
+ CHR_BEG => $chr_beg,
+ CHR_END => $chr_end - 1,
+ INDEX_BEG => $index_beg,
+ INDEX_LEN => $index_len,
+ };
+ }
+
+ $chr = $1;
+ $chr_beg = $2 - 1; # fixedStep files are 1-based
+ $step = $3;
+ $chr_end = $chr_beg;
+
+ $index_beg = $pos + length $line;
+ }
+ else
+ {
+ $chr_end++;
+ }
+
+ $pos += length $line;
+ }
+
+ $index_len = $pos - $index_beg;
+
+ push @{ $index{ $chr } }, {
+ CHR_BEG => $chr_beg,
+ CHR_END => $chr_end - 1,
+ INDEX_BEG => $index_beg,
+ INDEX_LEN => $index_len,
+ };
+
+ close $fh;
+
+ return wantarray ? %index : \%index;
+}
+
+
+sub fixedstep_index_store
+{
+ # Martin A. Hansen, January 2008.
+
+ # Writes a fixedStep index to binary file.
+
+ my ( $path, # full path to file
+ $index, # list with index
+ ) = @_;
+
+ # returns nothing
+
+ Maasha::Filesys::file_store( $path, $index );
+}
+
+
+sub fixedstep_index_retrieve
+{
+ # Martin A. Hansen, January 2008.
+
+ # Retrieves a fixedStep index from binary file.
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ # returns list
+
+ my $index;
+
+ $index = Maasha::Filesys::file_retrieve( $path );
+
+ return wantarray ? %{ $index } : $index;
+}
+
+
+sub fixedstep_index_lookup
+{
+ # Martin A. Hansen, January 2008.
+
+ # Locate fixedStep entries from index based on
+ # chr, chr_beg and chr_end.
+
+ my ( $index, # data structure
+ $chr, # chromosome
+ $chr_beg, # chromosome beg
+ $chr_end, # chromosome end
+ ) = @_;
+
+ # Returns a list
+
+ my ( @subindex );
+
+ if ( exists $index->{ $chr } ) {
+ @subindex = grep { $_->{ 'CHR_END' } >= $chr_beg and $_->{ 'CHR_BEG' } <= $chr_end } @{ $index->{ $chr } };
+ } else {
+ Maasha::Common::error( qq(Chromosome "$chr" not found in index) );
+ }
+
+ return wantarray ? @subindex : \@subindex;
+}
+
+
+sub wiggle_upload_to_ucsc
+{
+ # Martin A. Hansen, May 2008.
+
+ # Upload a wiggle file to the UCSC database.
+
+ my ( $tmp_dir, # temporary directory
+ $wib_dir, # wib directory
+ $wig_file, # file to upload,
+ $options, # argument hashref
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $args );
+
+# $args = join " ", "-tmpDir=$tmp_dir", "-pathPrefix=$wib_dir", $options->{ "database" }, $options->{ 'table' }, $wig_file;
+
+# Maasha::Common::run( "hgLoadWiggle", "$args > /dev/null 2>&1" );
+
+ if ( $options->{ 'verbose' } ) {
+ `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file`;
+ } else {
+ `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
+ }
+}
+
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<