#!/usr/bin/env perl # Copyright (C) 2007-2009 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Extract fixedstep scores from an indexed fixedstep file. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< use warnings; use strict; use Data::Dumper; use Maasha::Biopieces; use Maasha::Filesys; use Maasha::Calc; use Maasha::UCSC::Wiggle; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< my ( $options, $in, $out, $index, $fh, $record, $new_record, $subindex, $entry, $scores ); $options = Maasha::Biopieces::parse_options( [ { long => 'index', short => 'i', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, { long => 'chr', short => 'c', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'beg', short => 'b', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, { long => 'end', short => 'e', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, ] ); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); $index = Maasha::UCSC::Wiggle::fixedstep_index_retrieve( $options->{ 'index' } . ".index" ); $fh = Maasha::Filesys::file_read_open( $options->{ 'index' } . ".wig" ); if ( $options->{ 'chr' } and exists $index->{ $options->{ 'chr' } } ) { $subindex = Maasha::UCSC::Wiggle::fixedstep_index_lookup( $index, $options->{ 'chr' }, $options->{ 'beg' }, $options->{ 'end' } ); foreach $entry ( @{ $subindex } ) { $scores = get_scores( $fh, $entry->{ 'INDEX_BEG' }, $entry->{ 'INDEX_LEN' } ); if ( $options->{ 'beg' } > $entry->{ 'CHR_BEG' } ) { $scores = [ @{ $scores }[ ( $options->{ 'beg' } - $entry->{ 'CHR_BEG' } ) .. scalar @{ $scores } - 1 ] ]; } if ( $options->{ 'end' } < $entry->{ 'CHR_END' } ) { $scores = [ @{ $scores }[ 0 .. ( $options->{ 'end' } - $options->{ 'beg' } ) ] ]; } $new_record->{ 'REC_TYPE' } = 'fixed_step'; $new_record->{ 'STEP' } = 1; $new_record->{ 'CHR' } = $options->{ 'chr' }; $new_record->{ 'CHR_BEG' } = Maasha::Calc::max( $options->{ 'beg' }, $entry->{ 'CHR_BEG' } ); $new_record->{ 'VALS' } = join ";", @{ $scores }; Maasha::Biopieces::put_record( $new_record, $out ); } } while ( $record = Maasha::Biopieces::get_record( $in ) ) { if ( $record->{ 'CHR' } and $record->{ 'CHR_BEG' } and $record->{ 'CHR_END' } ) { if ( exists $index->{ $record->{ 'CHR' } } ) { $subindex = Maasha::UCSC::Wiggle::fixedstep_index_lookup( $index, $record->{ 'CHR' }, $record->{ 'CHR_BEG' }, $record->{ 'CHR_END' } ); foreach $entry ( @{ $subindex } ) { $scores = get_scores( $fh, $entry->{ 'INDEX_BEG' }, $entry->{ 'INDEX_LEN' } ); if ( $record->{ 'CHR_BEG' } > $entry->{ 'CHR_BEG' } ) { $scores = [ @{ $scores }[ ( $record->{ 'CHR_BEG' } - $entry->{ 'CHR_BEG' } ) .. scalar @{ $scores } - 1 ] ]; } if ( $record->{ 'CHR_END' } < $entry->{ 'CHR_END' } ) { $scores = [ @{ $scores }[ 0 .. ( $record->{ 'CHR_END' } - $record->{ 'CHR_BEG' } ) ] ]; } $new_record->{ 'REC_TYPE' } = 'fixed_step'; $new_record->{ 'STEP' } = 1; $new_record->{ 'CHR' } = $record->{ 'CHR' }; $new_record->{ 'CHR_BEG' } = Maasha::Calc::max( $record->{ 'CHR_BEG' }, $entry->{ 'CHR_BEG' } ); $new_record->{ 'VALS' } = join ";", @{ $scores }; Maasha::Biopieces::put_record( $new_record, $out ); } } } } close $fh; Maasha::Biopieces::close_stream( $in ); Maasha::Biopieces::close_stream( $out ); # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< sub get_scores { # Martin A. Hansen, June 2010. # Get scores from a fixedstep file based on index # offset and length. my ( $fh, # filehandle to fixedstep file $offset, # file offset $len, # length ) = @_; # Returns a list. my ( $block, @scores ); $block = Maasha::Filesys::file_read( $fh, $offset, $len ); @scores = split "\n", $block; return wantarray ? @scores : \@scores; } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< BEGIN { Maasha::Biopieces::status_set(); } END { Maasha::Biopieces::status_log(); } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< __END__