X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=code_perl%2FMaasha%2FUCSC%2FBED.pm;h=6bfa590b364d486f8202ddc454793b24d5fbc21d;hb=43a3f6b695375536014eb9626dc1b18abbbe83bb;hp=f2a11e8c8ba95ce85745f6dc2a98b21f004dffa8;hpb=430634c816f774703a59c0122006daf70ee570f8;p=biopieces.git diff --git a/code_perl/Maasha/UCSC/BED.pm b/code_perl/Maasha/UCSC/BED.pm index f2a11e8..6bfa590 100644 --- a/code_perl/Maasha/UCSC/BED.pm +++ b/code_perl/Maasha/UCSC/BED.pm @@ -30,6 +30,7 @@ package Maasha::UCSC::BED; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +use warnings; use strict; use Data::Dumper; @@ -74,9 +75,6 @@ use constant { }; -my $CHECK_ALL = 1; # Global flag indicating that BED input and output is checked thoroughly. - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -123,7 +121,8 @@ sub bed_entry_get # Reads a BED entry given a filehandle. my ( $fh, # file handle - $cols, # columns to read - OPTIONAL (3,4,5,6,8,9 or 12) + $cols, # columns to read - OPTIONAL (3,4,5,6,8,9 or 12) + $check, # check integrity of BED values - OPTIONAL ) = @_; # Returns a list. @@ -132,10 +131,10 @@ sub bed_entry_get $line = <$fh>; - $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr - return if not defined $line; + $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr + if ( not defined $cols ) { $cols = 1 + $line =~ tr/\t//; } @@ -144,7 +143,7 @@ sub bed_entry_get pop @entry if scalar @entry > $cols; - bed_entry_check( \@entry ) if $CHECK_ALL; + bed_entry_check( \@entry ) if $check; return wantarray ? @entry : \@entry; } @@ -159,6 +158,7 @@ sub bed_entry_put my ( $entry, # list $fh, # file handle - OPTIONAL $cols, # number of columns in BED file - OPTIONAL (3,4,5,6,8,9 or 12) + $check, # check integrity of BED values - OPTIONAL ) = @_; # Returns nothing. @@ -167,7 +167,7 @@ sub bed_entry_put @{ $entry } = @{ $entry }[ 0 .. $cols - 1 ]; } - bed_entry_check( $entry ) if $CHECK_ALL; + bed_entry_check( $entry ) if $check; $fh = \*STDOUT if not defined $fh; @@ -216,29 +216,30 @@ sub bed_entry_check Maasha::Common::error( qq(Bad BED entry - chromEnd must be greater than chromStart - not "$bed->[ chromStart ] > $bed->[ chromEnd ]") ); } - return if @{ $bed } == 3; + return if $cols == 3; if ( $bed->[ name ] =~ /\s/ ) { Maasha::Common::error( qq(Bad BED entry - no white space allowed in name field: "$bed->[ name ]") ); } - return if @{ $bed } == 4; + return if $cols == 4; if ( $bed->[ score ] =~ /\D/ ) { Maasha::Common::error( qq(Bad BED entry - score must be a whole number - not "$bed->[ score ]") ); } - if ( $bed->[ score ] < 0 or $bed->[ score ] > 1000 ) { + # if ( $bed->[ score ] < 0 or $bed->[ score ] > 1000 ) { # disabled - too restrictive ! + if ( $bed->[ score ] < 0 ) { Maasha::Common::error( qq(Bad BED entry - score must be between 0 and 1000 - not "$bed->[ score ]") ); } - return if @{ $bed } == 5; + return if $cols == 5; if ( $bed->[ strand ] ne '+' and $bed->[ strand ] ne '-' ) { Maasha::Common::error( qq(Bad BED entry - strand must be + or - not "$bed->[ strand ]") ); } - return if @{ $bed } == 6; + return if $cols == 6; if ( $bed->[ thickStart ] =~ /\D/ ) { Maasha::Common::error( qq(Bad BED entry - thickStart must be a whole number - not "$bed->[ thickStart ]") ); @@ -268,13 +269,13 @@ sub bed_entry_check Maasha::Common::error( qq(Bad BED entry - thickEnd must be less than chromEnd - not "$bed->[ thickEnd ] > $bed->[ chromEnd ]") ); } - return if @{ $bed } == 8; + return if $cols == 8; if ( $bed->[ itemRgb ] !~ /^(0|\d{1,3},\d{1,3},\d{1,3},?)$/ ) { Maasha::Common::error( qq(Bad BED entry - itemRgb must be 0 or in the form of 255,0,0 - not "$bed->[ itemRgb ]") ); } - return if @{ $bed } == 9; + return if $cols == 9; if ( $bed->[ blockCount ] =~ /\D/ ) { Maasha::Common::error( qq(Bad BED entry - blockCount must be a whole number - not "$bed->[ blockCount ]") ); @@ -418,7 +419,7 @@ sub bed2biopiece $bp_record{ "CHR" } = $bed_entry->[ chrom ]; $bp_record{ "CHR_BEG" } = $bed_entry->[ chromStart ]; $bp_record{ "CHR_END" } = $bed_entry->[ chromEnd ] - 1; - $bp_record{ "BED_LEN" } = $bed_entry->[ chromEnd ] - $bed_entry->[ chromEnd ]; + $bp_record{ "BED_LEN" } = $bed_entry->[ chromEnd ] - $bed_entry->[ chromStart ]; return wantarray ? %bp_record : \%bp_record if $cols == 3; @@ -471,7 +472,7 @@ sub biopiece2bed # Returns arrayref. - my ( @bed_entry ); + my ( @bed_entry, @begs ); $cols ||= 12; # max number of columns possible @@ -479,13 +480,21 @@ sub biopiece2bed $bp_record->{ "S_ID" } || return undef; - $bed_entry[ chromStart ] = $bp_record->{ "CHR_BEG" } || - $bp_record->{ "S_BEG" } || - return undef; + if ( defined $bp_record->{ "CHR_BEG" } ) { + $bed_entry[ chromStart ] = $bp_record->{ "CHR_BEG" }; + } elsif ( defined $bp_record->{ "S_BEG" } ) { + $bed_entry[ chromStart ] = $bp_record->{ "S_BEG" }; + } else { + return undef; + } - $bed_entry[ chromEnd ] = $bp_record->{ "CHR_END" } || - $bp_record->{ "S_END" } || - return undef; + if ( defined $bp_record->{ "CHR_END" } ) { + $bed_entry[ chromEnd ] = $bp_record->{ "CHR_END" }; + } elsif ( defined $bp_record->{ "S_END" }) { + $bed_entry[ chromEnd ] = $bp_record->{ "S_END" }; + } else { + return undef; + } $bed_entry[ chromEnd ]++; @@ -515,27 +524,47 @@ sub biopiece2bed defined $bp_record->{ "THICK_END" } ) { $bed_entry[ thickStart ] = $bp_record->{ "THICK_BEG" }; - $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" }; + $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1; + } + elsif ( defined $bp_record->{ "BLOCK_COUNT" } ) + { + $bed_entry[ thickStart ] = $bed_entry[ chromStart ]; + $bed_entry[ thickEnd ] = $bed_entry[ chromEnd ]; } return wantarray ? @bed_entry : \@bed_entry if $cols == 8; - if ( exists $bp_record->{ "COLOR" } ) { + if ( defined $bp_record->{ "COLOR" } ) + { $bed_entry[ itemRgb ] = $bp_record->{ "COLOR" }; - } else { - return wantarray ? @bed_entry : \@bed_entry; + } + elsif ( defined $bp_record->{ "BLOCK_COUNT" } ) + { + $bed_entry[ itemRgb ] = 0; } return wantarray ? @bed_entry : \@bed_entry if $cols == 9; if ( defined $bp_record->{ "BLOCK_COUNT" } and + defined $bp_record->{ "BLOCK_LENS" } and + defined $bp_record->{ "S_BEGS" } ) + { + @begs = split ",", $bp_record->{ "S_BEGS" }; + map { $_ -= $bed_entry[ chromStart ] } @begs; + + $bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" }; + $bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" }; + $bed_entry[ blockStarts ] = join ",", @begs; + $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1; + } + elsif ( defined $bp_record->{ "BLOCK_COUNT" } and defined $bp_record->{ "BLOCK_LENS" } and defined $bp_record->{ "Q_BEGS" } ) { $bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" }; $bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" }; $bed_entry[ blockStarts ] = $bp_record->{ "Q_BEGS" }; - $bed_entry[ thickEnd ]++; + $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1; } return wantarray ? @bed_entry : \@bed_entry; @@ -553,6 +582,7 @@ sub tag_contigs_assemble my ( $bed_file, # path to BED file $chr, # chromosome + $strand, # strand $dir, # working directory ) = @_; @@ -580,6 +610,7 @@ sub tag_contigs_assemble $entry->[ chromEnd ] = $end; $entry->[ name ] = sprintf( "TC%06d", $id ); $entry->[ score ] = $score; + $entry->[ strand ] = $strand; bed_entry_put( $entry, $fh_out );