$line = <$fh>;
- $line =~ s/(\n|\r)$//g; # some people have carriage returns in their BED files -> Grrrr
+ $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
return if not defined $line;
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub phastcons_parse_entry
-{
- # Martin A. Hansen, December 2007.
-
- # Given a PhastCons entry converts this to a
- # list of super blocks.
-
- my ( $lines, # list of lines
- $args, # argument hash
- ) = @_;
-
- # Returns
-
- my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
-
- $info = shift @{ $lines };
-
- if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
- {
- $chr = $1;
- $beg = $2;
- $step = $3;
-
- die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
- }
-
- $i = 0;
-
- while ( $i < @{ $lines } )
- {
- if ( $lines->[ $i ] >= $args->{ "threshold" } )
- {
- $c = $i + 1;
-
- while ( $c < @{ $lines } )
- {
- if ( $lines->[ $c ] < $args->{ "threshold" } )
- {
- $j = $c + 1;
-
- while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
- $j++;
- }
-
- if ( $j - $c > $args->{ "gap" } )
- {
- if ( $c - $i >= $args->{ "min" } )
- {
- push @blocks, {
- CHR => $chr,
- CHR_BEG => $beg + $i - 1,
- CHR_END => $beg + $c - 2,
- CHR_LEN => $c - $i,
- };
- }
-
- $i = $j;
-
- last;
- }
-
- $c = $j
- }
- else
- {
- $c++;
- }
- }
-
- if ( $c - $i >= $args->{ "min" } )
- {
- push @blocks, {
- CHR => $chr,
- CHR_BEG => $beg + $i - 1,
- CHR_END => $beg + $c - 2,
- CHR_LEN => $c - $i,
- };
- }
-
- $i = $c;
- }
- else
- {
- $i++;
- }
- }
-
- $i = 0;
-
- while ( $i < @blocks )
- {
- $c = $i + 1;
-
- while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
- {
- $c++;
- }
-
- push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
-
- $i = $c;
- }
-
- foreach $super_block ( @super_blocks )
- {
- foreach $block ( @{ $super_block } )
- {
- push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
- push @lens, $block->{ "CHR_LEN" } - 1;
- }
-
- $lens[ -1 ]++;
-
- push @entries, {
- CHR => $super_block->[ 0 ]->{ "CHR" },
- CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
- Q_ID => "Q_ID",
- SCORE => 100,
- STRAND => "+",
- THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
- ITEMRGB => "0,200,100",
- BLOCKCOUNT => scalar @{ $super_block },
- BLOCKSIZES => join( ",", @lens ),
- Q_BEGS => join( ",", @begs ),
- };
-
- undef @begs;
- undef @lens;
- }
-
- return wantarray ? @entries : \@entries;
-}
-
-
-sub phastcons_index_create
+sub fixedstep_index_create
{
# Martin A. Hansen, January 2008.
- # Indexes a concatenated PhastCons file.
+ # Indexes a concatenated fixedStep file.
# The index consists of a hash with chromosomes as keys,
# and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
- my ( $path, # path to PhastCons file
+ my ( $path, # path to fixedStep file
) = @_;
# Returns a hashref
if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
{
$chr = $1;
- $beg = $2 - 1; # phastcons files are 1-based
+ $beg = $2 - 1; # fixedStep files are 1-based
$step = $3;
}
else
{
- Maasha::Common::error( qq(Could not parse PhastCons locator: $locator) );
+ Maasha::Common::error( qq(Could not parse locator: $locator) );
}
$pos += length( $locator ) + 11;
}
-sub phastcons_index_store
+sub fixedstep_index_store
{
# Martin A. Hansen, January 2008.
- # Writes a PhastCons index to binary file.
+ # Writes a fixedStep index to binary file.
my ( $path, # full path to file
$index, # list with index
}
-sub phastcons_index_retrieve
+sub fixedstep_index_retrieve
{
# Martin A. Hansen, January 2008.
- # Retrieves a PhastCons index from binary file.
+ # Retrieves a fixedStep index from binary file.
my ( $path, # full path to file
) = @_;
}
-sub phastcons_index_lookup
+sub fixedStep_index_lookup
{
# Martin A. Hansen, January 2008.
- # Retrieve PhastCons scores from a indexed
- # Phastcons file given a chromosome and
+ # Retrieve fixedStep scores from a indexed
+ # fixedStep file given a chromosome and
# begin and end positions.
my ( $index, # data structure
}
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub phastcons_index
+{
+ # Martin A. Hansen, July 2008
+
+ # Create a fixedStep index for PhastCons data.
+
+ my ( $file, # file to index
+ $dir, # dir with file
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $index );
+
+ $index = fixedstep_index_create( "$dir/$file" );
+
+ fixedstep_index_store( "$dir/$file.index", $index );
+}
+
+
+sub phastcons_parse_entry
+{
+ # Martin A. Hansen, December 2007.
+
+ # Given a PhastCons entry converts this to a
+ # list of super blocks.
+
+ my ( $lines, # list of lines
+ $args, # argument hash
+ ) = @_;
+
+ # Returns
+
+ my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
+
+ $info = shift @{ $lines };
+
+ if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
+ {
+ $chr = $1;
+ $beg = $2;
+ $step = $3;
+
+ die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
+ }
+
+ $i = 0;
+
+ while ( $i < @{ $lines } )
+ {
+ if ( $lines->[ $i ] >= $args->{ "threshold" } )
+ {
+ $c = $i + 1;
+
+ while ( $c < @{ $lines } )
+ {
+ if ( $lines->[ $c ] < $args->{ "threshold" } )
+ {
+ $j = $c + 1;
+
+ while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
+ $j++;
+ }
+
+ if ( $j - $c > $args->{ "gap" } )
+ {
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $j;
+
+ last;
+ }
+
+ $c = $j
+ }
+ else
+ {
+ $c++;
+ }
+ }
+
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $c;
+ }
+ else
+ {
+ $i++;
+ }
+ }
+
+ $i = 0;
+
+ while ( $i < @blocks )
+ {
+ $c = $i + 1;
+
+ while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
+ {
+ $c++;
+ }
+
+ push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
+
+ $i = $c;
+ }
+
+ foreach $super_block ( @super_blocks )
+ {
+ foreach $block ( @{ $super_block } )
+ {
+ push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
+ push @lens, $block->{ "CHR_LEN" } - 1;
+ }
+
+ $lens[ -1 ]++;
+
+ push @entries, {
+ CHR => $super_block->[ 0 ]->{ "CHR" },
+ CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
+ Q_ID => "Q_ID",
+ SCORE => 100,
+ STRAND => "+",
+ THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
+ ITEMRGB => "0,200,100",
+ BLOCKCOUNT => scalar @{ $super_block },
+ BLOCKSIZES => join( ",", @lens ),
+ Q_BEGS => join( ",", @begs ),
+ };
+
+ undef @begs;
+ undef @lens;
+ }
+
+ return wantarray ? @entries : \@entries;
+}
+
+
sub phastcons_normalize
{
# Martin A. Hansen, January 2008.