-my $TIME = gettimeofday();
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-# http://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#BED
-
-
-sub bed_get_entry
-{
- # Martin A. Hansen, December 2007.
-
- # Reads a bed entry given a filehandle.
-
- my ( $fh, # file handle
- $columns, # number of BED columns to read - OPTIONAL
- ) = @_;
-
- # Returns hashref.
-
- my ( $line, @fields, %entry );
-
- $line = <$fh>;
-
- $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
-
- return if not defined $line;
-
- @fields = split "\t", $line;
-
- $columns ||= scalar @fields;
-
- if ( $columns == 3 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- );
- }
- elsif ( $columns == 4 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- );
- }
- elsif ( $columns == 5 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- );
- }
- elsif ( $columns == 6 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- "STRAND" => $fields[ 5 ],
- );
- }
- elsif ( $columns == 12 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- "STRAND" => $fields[ 5 ],
- "THICK_BEG" => $fields[ 6 ],
- "THICK_END" => $fields[ 7 ] - 1,
- "ITEMRGB" => $fields[ 8 ],
- "BLOCKCOUNT" => $fields[ 9 ],
- "BLOCKSIZES" => $fields[ 10 ],
- "Q_BEGS" => $fields[ 11 ],
- );
- }
- else
- {
- Maasha::Common::error( qq(Bad BED format in line->$line<-) );
- }
-
- $entry{ "REC_TYPE" } = "BED";
- $entry{ "BED_LEN" } = $entry{ "CHR_END" } - $entry{ "CHR_BEG" } + 1;
- $entry{ "BED_COLS" } = $columns;
-
- return wantarray ? %entry : \%entry;
-}
-
-
-sub bed_get_entries
-{
- # Martin A. Hansen, January 2008.
-
- # Given a path to a BED file, read in all entries
- # and return.
-
- my ( $path, # full path to BED file
- $columns, # number of columns in BED file - OPTIONAL (but is faster)
- ) = @_;
-
- # Returns a list.
-
- my ( $fh, $entry, @list );
-
- $fh = Maasha::Common::read_open( $path );
-
- while ( $entry = bed_get_entry( $fh ) ) {
- push @list, $entry;
- }
-
- close $fh;
-
- return wantarray ? @list : \@list;
-}
-
-
-sub bed_put_entry
-{
- # Martin A. Hansen, Septermber 2007.
-
- # Writes a BED entry to file.
-
- # NB, this could really be more robust!?
-
- my ( $record, # hashref
- $fh, # file handle - OPTIONAL
- $columns, # number of columns in BED file - OPTIONAL (but is faster)
- ) = @_;
-
- # Returns nothing.
-
- my ( @fields );
-
- $columns ||= 12; # max number of columns possible
-
- if ( $columns == 3 )
- {
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- }
- elsif ( $columns == 4 )
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- }
- elsif ( $columns == 5 )
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
- $record->{ "SCORE" } =~ s/\.\d*//;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- push @fields, $record->{ "SCORE" };
- }
- elsif ( $columns == 6 )
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
- $record->{ "SCORE" } =~ s/\.\d*//;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- push @fields, $record->{ "SCORE" };
- push @fields, $record->{ "STRAND" };
- }
- else
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
- $record->{ "SCORE" } =~ s/\.\d*//;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- push @fields, $record->{ "SCORE" };
- push @fields, $record->{ "STRAND" };
- push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" };
- push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" };
- push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" };
- push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" };
- push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" };
- push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" };
- }
-
- if ( $fh ) {
- print $fh join( "\t", @fields ), "\n";
- } else {
- print join( "\t", @fields ), "\n";
- }
-}
-
-
-sub bed_put_entries
-{
- # Martin A. Hansen, January 2008.
-
- # Write a list of BED entries.
-
- my ( $entries, # list of entries,
- $fh, # file handle - OPTIONAL
- ) = @_;
-
- # Returns nothing.
-
- map { bed_put_entry( $_, $fh ) } @{ $entries };
-}
-
-
-sub bed_analyze
-{
- # Martin A. Hansen, March 2008.
-
- # Given a bed record, analysis this to give information
- # about intron/exon sizes.
-
- my ( $entry, # BED entry
- ) = @_;
-
- # Returns hashref.
-
- my ( $i, @begs, @lens, $exon_max, $exon_min, $exon_len, $exon_tot, $intron_max, $intron_min, $intron_len, $intron_tot );
-
- $exon_max = 0;
- $exon_min = 9999999999;
- $intron_max = 0;
- $intron_min = 9999999999;
-
- $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" };
-
- @begs = split /,/, $entry->{ "Q_BEGS" };
- @lens = split /,/, $entry->{ "BLOCKSIZES" };
-
- for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
- {
- $exon_len = @lens[ $i ];
-
- $entry->{ "EXON_LEN_$i" } = $exon_len;
-
- $exon_max = $exon_len if $exon_len > $exon_max;
- $exon_min = $exon_len if $exon_len < $exon_min;
-
- $exon_tot += $exon_len;
- }
-
- $entry->{ "EXON_LEN_-1" } = $exon_len;
- $entry->{ "EXON_MAX_LEN" } = $exon_max;
- $entry->{ "EXON_MIN_LEN" } = $exon_min;
- $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } );
-
- $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1;
- $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0;
-
- if ( $entry->{ "INTRONS" } )
- {
- for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
- {
- $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] );
-
- $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len;