--- /dev/null
+#!/usr/bin/env perl
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Create a fixedstep index from fixedstep entries in the stream for use with [get_fixedstep].
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use warnings;
+use strict;
+use Maasha::Common;
+use Maasha::Biopieces;
+use Maasha::UCSC::Wiggle;
+use Maasha::Filesys;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $default, $formats, $options, $in, $out, $record, $tmp_dir, $file_out, $fh_out, $entry );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'directory', short => 'd', type => 'dir', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ { long => 'index_name', short => 'i', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ ]
+);
+
+Maasha::Common::error( qq(Directory already exists: "$options->{ 'directory' }") ) if -d $options->{ 'directory' };
+
+Maasha::Filesys::dir_create_if_not_exists( $options->{ 'directory' } );
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+$file_out = "$options->{ 'directory' }/$options->{ 'index_name' }.wig";
+$fh_out = Maasha::Filesys::file_write_open( $file_out );
+
+while ( $record = Maasha::Biopieces::get_record( $in ) )
+{
+ if ( $entry = Maasha::UCSC::Wiggle::biopiece2fixedstep( $record ) ) {
+ Maasha::UCSC::Wiggle::fixedstep_entry_put( $entry, $fh_out );
+ }
+
+ Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
+}
+
+close $fh_out;
+
+Maasha::UCSC::Wiggle::fixedstep_index( $file_out, $options->{ 'directory' }, $options->{ 'index_name' } . ".index" );
+
+Maasha::Biopieces::close_stream( $in );
+Maasha::Biopieces::close_stream( $out );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ Maasha::Biopieces::status_set();
+}
+
+
+END
+{
+ Maasha::Biopieces::status_log();
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+#!/usr/bin/env perl
+
+# Copyright (C) 2007-2009 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Extract fixedstep scores from an indexed fixedstep file.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use warnings;
+use strict;
+use Data::Dumper;
+use Maasha::Biopieces;
+use Maasha::Filesys;
+use Maasha::Calc;
+use Maasha::UCSC::Wiggle;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $options, $in, $out, $index, $fh, $record, $new_record, $subindex, $entry, $scores );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'index', short => 'i', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ { long => 'chr', short => 'c', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'beg', short => 'b', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 },
+ { long => 'end', short => 'e', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+$index = Maasha::UCSC::Wiggle::fixedstep_index_retrieve( $options->{ 'index' } . ".index" );
+
+$fh = Maasha::Filesys::file_read_open( $options->{ 'index' } . ".wig" );
+
+if ( $options->{ 'chr' } and exists $index->{ $options->{ 'chr' } } )
+{
+ $subindex = Maasha::UCSC::Wiggle::fixedstep_index_lookup( $index, $options->{ 'chr' }, $options->{ 'beg' }, $options->{ 'end' } );
+
+ foreach $entry ( @{ $subindex } )
+ {
+ $scores = get_scores( $fh, $entry->{ 'INDEX_BEG' }, $entry->{ 'INDEX_LEN' } );
+
+ if ( $options->{ 'beg' } > $entry->{ 'CHR_BEG' } ) {
+ $scores = [ @{ $scores }[ ( $options->{ 'beg' } - $entry->{ 'CHR_BEG' } ) .. scalar @{ $scores } - 1 ] ];
+ }
+
+ if ( $options->{ 'end' } < $entry->{ 'CHR_END' } ) {
+ $scores = [ @{ $scores }[ 0 .. ( $options->{ 'end' } - $options->{ 'beg' } ) ] ];
+ }
+
+ $new_record->{ 'REC_TYPE' } = 'fixed_step';
+ $new_record->{ 'STEP' } = 1;
+ $new_record->{ 'CHR' } = $options->{ 'chr' };
+ $new_record->{ 'CHR_BEG' } = Maasha::Calc::max( $options->{ 'beg' }, $entry->{ 'CHR_BEG' } );
+ $new_record->{ 'VALS' } = join ";", @{ $scores };
+
+ Maasha::Biopieces::put_record( $new_record, $out );
+ }
+}
+
+
+while ( $record = Maasha::Biopieces::get_record( $in ) )
+{
+ if ( $record->{ 'CHR' } and $record->{ 'CHR_BEG' } and $record->{ 'CHR_END' } )
+ {
+ if ( exists $index->{ $record->{ 'CHR' } } )
+ {
+ $subindex = Maasha::UCSC::Wiggle::fixedstep_index_lookup( $index, $record->{ 'CHR' }, $record->{ 'CHR_BEG' }, $record->{ 'CHR_END' } );
+
+ foreach $entry ( @{ $subindex } )
+ {
+ $scores = get_scores( $fh, $entry->{ 'INDEX_BEG' }, $entry->{ 'INDEX_LEN' } );
+
+ if ( $record->{ 'CHR_BEG' } > $entry->{ 'CHR_BEG' } ) {
+ $scores = [ @{ $scores }[ ( $record->{ 'CHR_BEG' } - $entry->{ 'CHR_BEG' } ) .. scalar @{ $scores } - 1 ] ];
+ }
+
+ if ( $record->{ 'CHR_END' } < $entry->{ 'CHR_END' } ) {
+ $scores = [ @{ $scores }[ 0 .. ( $record->{ 'CHR_END' } - $record->{ 'CHR_BEG' } ) ] ];
+ }
+
+ $new_record->{ 'REC_TYPE' } = 'fixed_step';
+ $new_record->{ 'STEP' } = 1;
+ $new_record->{ 'CHR' } = $record->{ 'CHR' };
+ $new_record->{ 'CHR_BEG' } = Maasha::Calc::max( $record->{ 'CHR_BEG' }, $entry->{ 'CHR_BEG' } );
+ $new_record->{ 'VALS' } = join ";", @{ $scores };
+
+ Maasha::Biopieces::put_record( $new_record, $out );
+ }
+ }
+ }
+}
+
+close $fh;
+
+Maasha::Biopieces::close_stream( $in );
+Maasha::Biopieces::close_stream( $out );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_scores
+{
+ # Martin A. Hansen, June 2010.
+
+ # Get scores from a fixedstep file based on index
+ # offset and length.
+
+ my ( $fh, # filehandle to fixedstep file
+ $offset, # file offset
+ $len, # length
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $block, @scores );
+
+ $block = Maasha::Filesys::file_read( $fh, $offset, $len );
+
+ @scores = split "\n", $block;
+
+ return wantarray ? @scores : \@scores;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ Maasha::Biopieces::status_set();
+}
+
+
+END
+{
+ Maasha::Biopieces::status_log();
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
use Data::Dumper;
use Maasha::Common;
use Maasha::Filesys;
+use Maasha::Matrix;
use Maasha::Calc;
use vars qw( @ISA @EXPORT_OK );
blockCount => 9,
blockSizes => 10,
blockStarts => 11,
-
- FS_CHR_BEG => 0,
- FS_NEXT_CHR_BEG => 1,
- FS_CHR_END => 2,
- FS_INDEX_BEG => 3,
- FS_INDEX_LEN => 4,
};
}
+sub fixedstep_index
+{
+ # Martin A. Hansen, June 2010.
+
+ # Indexes a specified fixedstep file and saves the index
+ # to a specified directory and index name.
+
+ my ( $path, # fixedstep file to index
+ $dir, # direcotory to place index
+ $name, # name of index
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $index );
+
+ $index = fixedstep_index_create( $path );
+ fixedstep_index_store( "$dir/$name", $index );
+}
+
+
sub fixedstep_index_create
{
- # Martin A. Hansen, January 2008.
+ # Martin A. Hansen, June 2010.
- # Indexes a concatenated fixedStep file.
+ # Indexes a specified fixedStep file.
# The index consists of a hash with chromosomes as keys,
- # and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
+ # and a list of { chr_beg, chr_end, index_beg, index_len }
+ # hashes as values.
my ( $path, # path to fixedStep file
) = @_;
# Returns a hashref
- my ( $fh, $pos, $index_beg, $index_len, $entry, $locator, $chr, $step, $beg, $end, $len, %index, $i );
+ my ( $fh, $pos, $line, $index_beg, $index_len, $chr, $step, $chr_beg, $chr_end, $len, %index );
$fh = Maasha::Filesys::file_read_open( $path );
- $pos = 0;
+ $index_beg = 0;
+ $index_len = 0;
+ $chr_beg = 0;
+ $chr_end = 0;
+ $pos = 0;
- while ( $entry = Maasha::UCSC::fixedstep_get_entry( $fh ) )
+ while ( $line = <$fh> )
{
- $locator = shift @{ $entry };
-
- if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
+ if ( $line =~ /^fixedStep chrom=([^ ]+) start=(\d+) step=(\d+)/ )
{
- $chr = $1;
- $beg = $2 - 1; # fixedStep files are 1-based
- $step = $3;
+ if ( $chr_end > $chr_beg )
+ {
+ $index_len = $pos - $index_beg;
+
+ push @{ $index{ $chr } }, {
+ CHR_BEG => $chr_beg,
+ CHR_END => $chr_end - 1,
+ INDEX_BEG => $index_beg,
+ INDEX_LEN => $index_len,
+ };
+ }
+
+ $chr = $1;
+ $chr_beg = $2 - 1; # fixedStep files are 1-based
+ $step = $3;
+ $chr_end = $chr_beg;
+
+ $index_beg = $pos + length $line;
}
else
{
- Maasha::Common::error( qq(Could not parse locator: $locator) );
+ $chr_end++;
}
- $pos += length( $locator ) + 11;
-
- $index_beg = $pos;
-
-# map { $pos += length( $_ ) + 1 } @{ $entry };
-
- $pos += 6 * scalar @{ $entry };
-
- $index_len = $pos - $index_beg;
-
- push @{ $index{ $chr } }, [ $beg, undef, $beg + scalar @{ $entry } - 1, $index_beg, $index_len ];
+ $pos += length $line;
}
- close $fh;
+ $index_len = $pos - $index_beg;
- foreach $chr ( keys %index )
- {
- for ( $i = 0; $i < @{ $index{ $chr } } - 1; $i++ ) {
- $index{ $chr }->[ $i ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ];
- }
+ push @{ $index{ $chr } }, {
+ CHR_BEG => $chr_beg,
+ CHR_END => $chr_end - 1,
+ INDEX_BEG => $index_beg,
+ INDEX_LEN => $index_len,
+ };
- $index{ $chr }->[ -1 ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ FS_CHR_END ] + 1;
- }
+ close $fh;
return wantarray ? %index : \%index;
}
{
# Martin A. Hansen, January 2008.
- # Retrieve fixedStep scores from a indexed
- # fixedStep file given a chromosome and
- # begin and end positions.
+ # Locate fixedStep entries from index based on
+ # chr, chr_beg and chr_end.
my ( $index, # data structure
- $fh, # filehandle to datafile
$chr, # chromosome
$chr_beg, # chromosome beg
$chr_end, # chromosome end
- $flank, # include flanking region - OPTIONAL
) = @_;
# Returns a list
+
+ my ( @subindex );
- my ( $index_beg, $index_end, $i, $c, $beg, $end, @vals, $scores );
-
- $flank ||= 0;
-
- $chr_beg -= $flank;
- $chr_end += $flank;
-
- # print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n";
-
- if ( exists $index->{ $chr } )
- {
- $index_beg = Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_beg );
-
- if ( $index_beg < 0 ) {
- Maasha::Common::error( qq(Index search failed - begin index position doesn't exists: $chr_beg) );
- }
-
- if ( $chr_end < $index->{ $chr }->[ $index_beg ]->[ 1 ] )
- {
- $index_end = $index_beg;
- }
- else
- {
- $index_end = Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_end );
-
- if ( $index_end < 0 ) {
- Maasha::Common::error( qq(Index search failed - end index position doesn't exists: $chr_end) );
- }
- }
-
- map { $scores->[ $_ ] = 0 } 0 .. $chr_end - $chr_beg;
-
- if ( $index_beg == $index_end )
- {
- $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
- $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
-
- if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] )
- {
- @vals = split "\n", Maasha::Filesys::file_read(
- $fh,
- $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
- 6 * ( $end - $beg + 1 ),
- );
- }
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
- }
- }
- else
- {
- $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
-
-# print Dumper( $beg, $index->{ $chr }->[ $index_beg ] );
-# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ FS_NEXT_CHR_BEG ] );
-
- # beg next
- # v v
- # |||||||||.......
-
- if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] )
- {
- @vals = split "\n", Maasha::Filesys::file_read(
- $fh,
- $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
- 6 * ( $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] - $beg + 1 ),
- );
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
- }
- }
-
- $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
-
- if ( $end <= $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] )
- {
- @vals = split "\n", Maasha::Filesys::file_read(
- $fh,
- $index->{ $chr }->[ $index_end ]->[ FS_INDEX_BEG ],
- 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] + 1 ),
- );
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
- }
- }
-
- for ( $i = $index_beg + 1; $i <= $index_end - 1; $i++ )
- {
- @vals = split "\n", Maasha::Filesys::file_read(
- $fh,
- $index->{ $chr }->[ $i ]->[ FS_INDEX_BEG ],
- 6 * ( $index->{ $chr }->[ $i ]->[ FS_CHR_END ] - $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] + 1 ),
- );
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
- }
- }
- }
- }
- else
- {
- Maasha::Common::error( qq(Chromosome "$chr" was not found in index) );
+ if ( exists $index->{ $chr } ) {
+ @subindex = grep { $_->{ 'CHR_END' } >= $chr_beg and $_->{ 'CHR_BEG' } <= $chr_end } @{ $index->{ $chr } };
+ } else {
+ Maasha::Common::error( qq(Chromosome "$chr" not found in index) );
}
- return wantarray ? @{ $scores } : $scores;
+ return wantarray ? @subindex : \@subindex;
}