# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use Data::Dumper;
};
-my $CHECK_ALL = 1; # Global flag indicating that BED input and output is checked thoroughly.
-
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# Reads a BED entry given a filehandle.
my ( $fh, # file handle
- $cols, # columns to read - OPTIONAL (3,4,5,6,8,9 or 12)
+ $cols, # columns to read - OPTIONAL (3,4,5,6,8,9 or 12)
+ $check, # check integrity of BED values - OPTIONAL
) = @_;
# Returns a list.
$line = <$fh>;
- $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
-
return if not defined $line;
+ $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
+
if ( not defined $cols ) {
$cols = 1 + $line =~ tr/\t//;
}
pop @entry if scalar @entry > $cols;
- bed_entry_check( \@entry ) if $CHECK_ALL;
+ bed_entry_check( \@entry ) if $check;
return wantarray ? @entry : \@entry;
}
my ( $entry, # list
$fh, # file handle - OPTIONAL
$cols, # number of columns in BED file - OPTIONAL (3,4,5,6,8,9 or 12)
+ $check, # check integrity of BED values - OPTIONAL
) = @_;
# Returns nothing.
@{ $entry } = @{ $entry }[ 0 .. $cols - 1 ];
}
- bed_entry_check( $entry ) if $CHECK_ALL;
+ bed_entry_check( $entry ) if $check;
$fh = \*STDOUT if not defined $fh;
Maasha::Common::error( qq(Bad BED entry - chromEnd must be greater than chromStart - not "$bed->[ chromStart ] > $bed->[ chromEnd ]") );
}
- return if @{ $bed } == 3;
+ return if $cols == 3;
if ( $bed->[ name ] =~ /\s/ ) {
Maasha::Common::error( qq(Bad BED entry - no white space allowed in name field: "$bed->[ name ]") );
}
- return if @{ $bed } == 4;
+ return if $cols == 4;
if ( $bed->[ score ] =~ /\D/ ) {
Maasha::Common::error( qq(Bad BED entry - score must be a whole number - not "$bed->[ score ]") );
}
- if ( $bed->[ score ] < 0 or $bed->[ score ] > 1000 ) {
+ # if ( $bed->[ score ] < 0 or $bed->[ score ] > 1000 ) { # disabled - too restrictive !
+ if ( $bed->[ score ] < 0 ) {
Maasha::Common::error( qq(Bad BED entry - score must be between 0 and 1000 - not "$bed->[ score ]") );
}
- return if @{ $bed } == 5;
+ return if $cols == 5;
if ( $bed->[ strand ] ne '+' and $bed->[ strand ] ne '-' ) {
Maasha::Common::error( qq(Bad BED entry - strand must be + or - not "$bed->[ strand ]") );
}
- return if @{ $bed } == 6;
+ return if $cols == 6;
if ( $bed->[ thickStart ] =~ /\D/ ) {
Maasha::Common::error( qq(Bad BED entry - thickStart must be a whole number - not "$bed->[ thickStart ]") );
Maasha::Common::error( qq(Bad BED entry - thickEnd must be less than chromEnd - not "$bed->[ thickEnd ] > $bed->[ chromEnd ]") );
}
- return if @{ $bed } == 8;
+ return if $cols == 8;
if ( $bed->[ itemRgb ] !~ /^(0|\d{1,3},\d{1,3},\d{1,3},?)$/ ) {
Maasha::Common::error( qq(Bad BED entry - itemRgb must be 0 or in the form of 255,0,0 - not "$bed->[ itemRgb ]") );
}
- return if @{ $bed } == 9;
+ return if $cols == 9;
if ( $bed->[ blockCount ] =~ /\D/ ) {
Maasha::Common::error( qq(Bad BED entry - blockCount must be a whole number - not "$bed->[ blockCount ]") );
$bp_record{ "CHR" } = $bed_entry->[ chrom ];
$bp_record{ "CHR_BEG" } = $bed_entry->[ chromStart ];
$bp_record{ "CHR_END" } = $bed_entry->[ chromEnd ] - 1;
- $bp_record{ "BED_LEN" } = $bed_entry->[ chromEnd ] - $bed_entry->[ chromEnd ];
+ $bp_record{ "BED_LEN" } = $bed_entry->[ chromEnd ] - $bed_entry->[ chromStart ];
return wantarray ? %bp_record : \%bp_record if $cols == 3;
# Returns arrayref.
- my ( @bed_entry );
+ my ( @bed_entry, @begs );
$cols ||= 12; # max number of columns possible
$bp_record->{ "S_ID" } ||
return undef;
- $bed_entry[ chromStart ] = $bp_record->{ "CHR_BEG" } ||
- $bp_record->{ "S_BEG" } ||
- return undef;
+ if ( defined $bp_record->{ "CHR_BEG" } ) {
+ $bed_entry[ chromStart ] = $bp_record->{ "CHR_BEG" };
+ } elsif ( defined $bp_record->{ "S_BEG" } ) {
+ $bed_entry[ chromStart ] = $bp_record->{ "S_BEG" };
+ } else {
+ return undef;
+ }
- $bed_entry[ chromEnd ] = $bp_record->{ "CHR_END" } ||
- $bp_record->{ "S_END" } ||
- return undef;
+ if ( defined $bp_record->{ "CHR_END" } ) {
+ $bed_entry[ chromEnd ] = $bp_record->{ "CHR_END" };
+ } elsif ( defined $bp_record->{ "S_END" }) {
+ $bed_entry[ chromEnd ] = $bp_record->{ "S_END" };
+ } else {
+ return undef;
+ }
$bed_entry[ chromEnd ]++;
defined $bp_record->{ "THICK_END" } )
{
$bed_entry[ thickStart ] = $bp_record->{ "THICK_BEG" };
- $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" };
+ $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1;
+ }
+ elsif ( defined $bp_record->{ "BLOCK_COUNT" } )
+ {
+ $bed_entry[ thickStart ] = $bed_entry[ chromStart ];
+ $bed_entry[ thickEnd ] = $bed_entry[ chromEnd ];
}
return wantarray ? @bed_entry : \@bed_entry if $cols == 8;
- if ( exists $bp_record->{ "COLOR" } ) {
+ if ( defined $bp_record->{ "COLOR" } )
+ {
$bed_entry[ itemRgb ] = $bp_record->{ "COLOR" };
- } else {
- return wantarray ? @bed_entry : \@bed_entry;
+ }
+ elsif ( defined $bp_record->{ "BLOCK_COUNT" } )
+ {
+ $bed_entry[ itemRgb ] = 0;
}
return wantarray ? @bed_entry : \@bed_entry if $cols == 9;
if ( defined $bp_record->{ "BLOCK_COUNT" } and
+ defined $bp_record->{ "BLOCK_LENS" } and
+ defined $bp_record->{ "S_BEGS" } )
+ {
+ @begs = split ",", $bp_record->{ "S_BEGS" };
+ map { $_ -= $bed_entry[ chromStart ] } @begs;
+
+ $bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" };
+ $bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" };
+ $bed_entry[ blockStarts ] = join ",", @begs;
+ $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1;
+ }
+ elsif ( defined $bp_record->{ "BLOCK_COUNT" } and
defined $bp_record->{ "BLOCK_LENS" } and
defined $bp_record->{ "Q_BEGS" } )
{
$bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" };
$bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" };
$bed_entry[ blockStarts ] = $bp_record->{ "Q_BEGS" };
- $bed_entry[ thickEnd ]++;
+ $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1;
}
return wantarray ? @bed_entry : \@bed_entry;
my ( $bed_file, # path to BED file
$chr, # chromosome
+ $strand, # strand
$dir, # working directory
) = @_;
$entry->[ chromEnd ] = $end;
$entry->[ name ] = sprintf( "TC%06d", $id );
$entry->[ score ] = $score;
+ $entry->[ strand ] = $strand;
bed_entry_put( $entry, $fh_out );