# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use vars qw ( @ISA @EXPORT );
use Maasha::Matrix;
use constant {
- FS_CHR_BEG => 0,
- FS_NEXT_CHR_BEG => 1,
- FS_CHR_END => 2,
- FS_INDEX_BEG => 3,
- FS_INDEX_LEN => 4,
-
CHR => 0,
CHR_BEG => 1,
CHR_END => 2,
STRAND => 5,
THICK_BEG => 6,
THICK_END => 7,
- ITEMRGB => 8,
- BLOCKCOUNT => 9,
- BLOCKSIZES => 10,
+ COLOR => 8,
+ BLOCK_COUNT => 9,
+ BLOCK_LENS => 10,
Q_BEGS => 11,
};
@ISA = qw( Exporter );
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-# http://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#BED
-
-
-sub bed_entry_get_array
-{
- # Martin A. Hansen, September 2008.
-
- # Reads a BED entry given a filehandle.
-
- # This is a new _faster_ BED entry parser that
- # uses arrays and not hashrefs.
-
- # IMPORTANT! This function does not correct the
- # BED_END position that is kept the way UCSC
- # does.
-
- my ( $fh, # file handle
- $cols, # columns to read - OPTIONAL (3,4,5,6, or 12)
- ) = @_;
-
- # Returns a list.
-
- my ( $line, @entry );
-
- $line = <$fh>;
-
- $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
-
- return if not defined $line;
-
- if ( not defined $cols ) {
- $cols = 1 + $line =~ tr/\t//;
- }
-
- @entry = split "\t", $line, $cols + 1;
-
- pop @entry if scalar @entry > $cols;
-
- return wantarray ? @entry : \@entry;
-}
-
-
-sub bed_get_entry
-{
- # Martin A. Hansen, December 2007.
-
- # Reads a bed entry given a filehandle.
-
- my ( $fh, # file handle
- $columns, # number of BED columns to read - OPTIONAL
- ) = @_;
-
- # Returns hashref.
-
- my ( $line, @fields, %entry );
-
- $line = <$fh>;
-
- $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
-
- return if not defined $line;
-
- @fields = split "\t", $line;
-
- $columns ||= scalar @fields;
-
- if ( $columns == 3 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- );
- }
- elsif ( $columns == 4 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- );
- }
- elsif ( $columns == 5 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- );
- }
- elsif ( $columns == 6 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- "STRAND" => $fields[ 5 ],
- );
- }
- elsif ( $columns == 12 )
- {
- %entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- "STRAND" => $fields[ 5 ],
- "THICK_BEG" => $fields[ 6 ],
- "THICK_END" => $fields[ 7 ] - 1,
- "ITEMRGB" => $fields[ 8 ],
- "BLOCKCOUNT" => $fields[ 9 ],
- "BLOCKSIZES" => $fields[ 10 ],
- "Q_BEGS" => $fields[ 11 ],
- );
- }
- else
- {
- Maasha::Common::error( qq(Bad BED format in line->$line<-) );
- }
-
- $entry{ "REC_TYPE" } = "BED";
- $entry{ "BED_LEN" } = $entry{ "CHR_END" } - $entry{ "CHR_BEG" } + 1;
- $entry{ "BED_COLS" } = $columns;
-
- return wantarray ? %entry : \%entry;
-}
-
-
-sub bed_get_entries
-{
- # Martin A. Hansen, January 2008.
-
- # Given a path to a BED file, read in all entries
- # and return.
-
- my ( $path, # full path to BED file
- $columns, # number of columns in BED file - OPTIONAL (but is faster)
- ) = @_;
-
- # Returns a list.
-
- my ( $fh, $entry, @list );
-
- $fh = Maasha::Common::read_open( $path );
-
- while ( $entry = bed_get_entry( $fh ) ) {
- push @list, $entry;
- }
-
- close $fh;
-
- return wantarray ? @list : \@list;
-}
-
-
-sub bed_entry_put_array
-{
- # Martin A. Hansen, Septermber 2008.
-
- # Writes a BED entry array to file.
-
- # IMPORTANT! This function does not correct the
- # BED_END position that is assumed to be in the
- # UCSC positions scheme.
-
- my ( $record, # list
- $fh, # file handle - OPTIONAL
- $cols, # number of columns in BED file - OPTIONAL
- ) = @_;
-
- # Returns nothing.
-
- $fh = \*STDOUT if not defined $fh;
-
- if ( defined $cols ) {
- print $fh join( "\t", @{ $record }[ 0 .. $cols - 1 ] ), "\n";
- } else {
- print $fh join( "\t", @{ $record } ), "\n";
- }
-}
-
-
-sub bed_put_entry
-{
- # Martin A. Hansen, Septermber 2007.
-
- # Writes a BED entry to file.
-
- # NB, this could really be more robust!?
-
- my ( $record, # hashref
- $fh, # file handle - OPTIONAL
- $columns, # number of columns in BED file - OPTIONAL (but is faster)
- ) = @_;
-
- # Returns nothing.
-
- my ( @fields );
-
- $columns ||= 12; # max number of columns possible
-
- if ( $columns == 3 )
- {
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- }
- elsif ( $columns == 4 )
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- }
- elsif ( $columns == 5 )
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
- $record->{ "SCORE" } =~ s/\.\d*//;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- push @fields, $record->{ "SCORE" };
- }
- elsif ( $columns == 6 )
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
- $record->{ "SCORE" } =~ s/\.\d*//;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- push @fields, $record->{ "SCORE" };
- push @fields, $record->{ "STRAND" };
- }
- else
- {
- $record->{ "Q_ID" } =~ s/\s+/_/g;
- $record->{ "SCORE" } =~ s/\.\d*//;
-
- push @fields, $record->{ "CHR" };
- push @fields, $record->{ "CHR_BEG" };
- push @fields, $record->{ "CHR_END" } + 1;
- push @fields, $record->{ "Q_ID" };
- push @fields, $record->{ "SCORE" };
- push @fields, $record->{ "STRAND" };
- push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" };
- push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" };
- push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" };
- push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" };
- push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" };
- push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" };
- }
-
- if ( $fh ) {
- print $fh join( "\t", @fields ), "\n";
- } else {
- print join( "\t", @fields ), "\n";
- }
-}
-
-
-sub bed_put_entries
-{
- # Martin A. Hansen, January 2008.
-
- # Write a list of BED entries.
-
- my ( $entries, # list of entries,
- $fh, # file handle - OPTIONAL
- ) = @_;
-
- # Returns nothing.
-
- map { bed_put_entry( $_, $fh ) } @{ $entries };
-}
-
-
-sub bed_analyze
-{
- # Martin A. Hansen, March 2008.
-
- # Given a bed record, analysis this to give information
- # about intron/exon sizes.
-
- my ( $entry, # BED entry
- ) = @_;
-
- # Returns hashref.
-
- my ( $i, @begs, @lens, $exon_max, $exon_min, $exon_len, $exon_tot, $intron_max, $intron_min, $intron_len, $intron_tot );
-
- $exon_max = 0;
- $exon_min = 9999999999;
- $intron_max = 0;
- $intron_min = 9999999999;
-
- $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" };
-
- @begs = split /,/, $entry->{ "Q_BEGS" };
- @lens = split /,/, $entry->{ "BLOCKSIZES" };
-
- for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
- {
- $exon_len = @lens[ $i ];
-
- $entry->{ "EXON_LEN_$i" } = $exon_len;
-
- $exon_max = $exon_len if $exon_len > $exon_max;
- $exon_min = $exon_len if $exon_len < $exon_min;
-
- $exon_tot += $exon_len;
- }
-
- $entry->{ "EXON_LEN_-1" } = $exon_len;
- $entry->{ "EXON_MAX_LEN" } = $exon_max;
- $entry->{ "EXON_MIN_LEN" } = $exon_min;
- $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } );
-
- $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1;
- $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0;
-
- if ( $entry->{ "INTRONS" } )
- {
- for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
- {
- $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] );
-
- $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len;
-
- $intron_max = $intron_len if $intron_len > $intron_max;
- $intron_min = $intron_len if $intron_len < $intron_min;
-
- $intron_tot += $intron_len;
- }
-
- $entry->{ "INTRON_LEN_-1" } = $intron_len;
- $entry->{ "INTRON_MAX_LEN" } = $intron_max;
- $entry->{ "INTRON_MIN_LEN" } = $intron_min;
- $entry->{ "INTRON_MEAN_LEN" } = int( $intron_tot / $entry->{ "INTRONS" } );
- }
-
- return wantarray ? %{ $entry } : $entry;
-}
-
-
-sub bed_sort
-{
- # Martin A. Hansen, Septermber 2008
-
- # Sorts a BED file using the c program
- # "bed_sort" specifing a sort mode:
-
- # 1: chr AND chr_beg.
- # 2: chr AND strand AND chr_beg.
- # 3: chr_beg.
- # 4: strand AND chr_beg.
-
- my ( $bed_file, # BED file to sort
- $sort_mode, # See above.
- $cols, # Number of columns in BED file
- ) = @_;
-
- &Maasha::Common::run( "bed_sort", "--sort $sort_mode --cols $cols $bed_file" );
-}
-
-
-sub bed_split_to_files
-{
- # Martin A. Hansen, Septermber 2008
-
- # Given a list of BED files, split these
- # into temporary files based on the chromosome
- # name. Returns a list of the temporary files.
-
- my ( $bed_files, # list of BED files to split
- $cols, # number of columns
- $tmp_dir, # temporary directory
- ) = @_;
-
- # Returns a list.
-
- my ( $bed_file, $fh_in, $entry, $key, %fh_hash, @tmp_files );
-
- foreach $bed_file ( @{ $bed_files } )
- {
- $fh_in = Maasha::Common::read_open( $bed_file );
-
- while ( $entry = bed_entry_get_array( $fh_in, $cols ) )
- {
- $key = $entry->[ CHR ];
-
- $fh_hash{ $key } = Maasha::Common::write_open( "$tmp_dir/$key.temp" ) if not exists $fh_hash{ $key };
-
- bed_entry_put_array( $entry, $fh_hash{ $key } );
- }
-
- close $fh_in;
- }
-
- foreach $key ( sort keys %fh_hash )
- {
- push @tmp_files, "$tmp_dir/$key.temp";
-
- close $fh_hash{ $key };
- }
-
- return wantarray ? @tmp_files : \@tmp_files;
-}
-
-
-sub bed_merge_entries
-{
- # Martin A. Hansen, February 2008.
-
- # Merge a list of given BED entries in one big entry.
-
- my ( $entries, # list of BED entries to be merged
- ) = @_;
-
- # Returns hash.
-
- my ( $i, @q_ids, @q_begs, @blocksizes, @new_q_begs, @new_blocksizes, %new_entry );
-
- @{ $entries } = sort { $a->{ "CHR_BEG" } <=> $b->{ "CHR_BEG" } } @{ $entries };
-
- for ( $i = 0; $i < @{ $entries }; $i++ )
- {
- Maasha::Common::error( qq(Attempted merge of BED entries from different chromosomes) ) if $entries->[ 0 ]->{ "CHR" } ne $entries->[ $i ]->{ "CHR" };
- Maasha::Common::error( qq(Attempted merge of BED entries from different strands) ) if $entries->[ 0 ]->{ "STRAND" } ne $entries->[ $i ]->{ "STRAND" };
-
- push @q_ids, $entries->[ $i ]->{ "Q_ID" } || sprintf( "ID%06d", $i );
-
- if ( exists $entries->[ $i ]->{ "Q_BEGS" } )
- {
- @q_begs = split ",", $entries->[ $i ]->{ "Q_BEGS" };
- @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" };
- }
- else
- {
- @q_begs = 0;
- @blocksizes = $entries->[ $i ]->{ "CHR_END" } - $entries->[ $i ]->{ "CHR_BEG" } + 1;
- }
-
- map { $_ += $entries->[ $i ]->{ "CHR_BEG" } } @q_begs;
-
- push @new_q_begs, @q_begs;
- push @new_blocksizes, @blocksizes;
- }
-
- map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs;
-
- %new_entry = (
- CHR => $entries->[ 0 ]->{ "CHR" },
- CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" },
- CHR_END => $entries->[ -1 ]->{ "CHR_END" },
- REC_TYPE => "BED",
- BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
- BED_COLS => 12,
- Q_ID => join( ":", @q_ids ),
- SCORE => 999,
- STRAND => $entries->[ 0 ]->{ "STRAND" } || "+",
- THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" },
- THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
- ITEMRGB => "0,0,0",
- BLOCKCOUNT => scalar @new_q_begs,
- BLOCKSIZES => join( ",", @new_blocksizes ),
- Q_BEGS => join( ",", @new_q_begs ),
- );
-
- return wantarray ? %new_entry : \%new_entry;
-}
-
-
-sub bed_split_entry
-{
- # Martin A. Hansen, February 2008.
-
- # Splits a given BED entry into a list of blocks,
- # which are returned. A list of 6 column BED entry is returned.
-
- my ( $entry, # BED entry hashref
- ) = @_;
-
- # Returns a list.
-
- my ( @q_begs, @blocksizes, $block, @blocks, $i );
-
- if ( exists $entry->{ "BLOCKCOUNT" } )
- {
- @q_begs = split ",", $entry->{ "Q_BEGS" };
- @blocksizes = split ",", $entry->{ "BLOCKSIZES" };
-
- for ( $i = 0; $i < @q_begs; $i++ )
- {
- undef $block;
-
- $block->{ "CHR" } = $entry->{ "CHR" };
- $block->{ "CHR_BEG" } = $entry->{ "CHR_BEG" } + $q_begs[ $i ];
- $block->{ "CHR_END" } = $entry->{ "CHR_BEG" } + $q_begs[ $i ] + $blocksizes[ $i ] - 1;
- $block->{ "Q_ID" } = $entry->{ "Q_ID" } . sprintf( "_%03d", $i );
- $block->{ "SCORE" } = $entry->{ "SCORE" };
- $block->{ "STRAND" } = $entry->{ "STRAND" };
- $block->{ "BED_LEN" } = $block->{ "CHR_END" } - $block->{ "CHR_BEG" } + 1,
- $block->{ "BED_COLS" } = 6;
- $block->{ "REC_TYPE" } = "BED";
-
- push @blocks, $block;
- }
- }
- else
- {
- @blocks = @{ $entry };
- }
-
- return wantarray ? @blocks : \@blocks;
-}
-
-
-
-sub bed_overlap
-{
- # Martin A. Hansen, February 2008.
-
- # Checks if two BED entries overlap and
- # return 1 if so - else 0;
-
- my ( $entry1, # hashref
- $entry2, # hashref
- $no_strand, # don't check strand flag - OPTIONAL
- ) = @_;
-
- # Return bolean.
-
- return 0 if $entry1->{ "CHR" } ne $entry2->{ "CHR" };
- return 0 if $entry1->{ "STRAND" } ne $entry2->{ "STRAND" };
-
- if ( $entry1->{ "CHR_END" } < $entry2->{ "CHR_BEG" } or $entry1->{ "CHR_BEG" } > $entry2->{ "CHR_END" } ) {
- return 0;
- } else {
- return 1;
- }
-}
-
-
-sub bed_upload_to_ucsc
-{
- # Martin A. Hansen, September 2007.
-
- # Upload a BED file to the UCSC database.
-
- my ( $tmp_dir, # temporary directory
- $file, # file to upload,
- $options, # argument hashref
- $append, # flag indicating table should be appended
- ) = @_;
-
- # Returns nothing.
-
- my ( $args, $table, $sql_file, $fh_out, $fh_in );
-
- if ( $append ) {
- $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", "-oldTable", $file;
- } else {
- $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", $file;
- }
-
- if ( $options->{ "table" } =~ /rnaSecStr/ )
- {
- $table = $options->{ "table" };
-
- print qq(uploading secondary structure table:"$table"\n) if $options->{ "verbose" };
-
- $sql_file = "$tmp_dir/upload_RNA_SS.sql";
-
- $fh_out = Maasha::Common::write_open( $sql_file );
-
- print $fh_out qq(
-CREATE TABLE $table (
- bin smallint not null, # Bin number for browser speedup
- chrom varchar(255) not null, # Chromosome or FPC contig
- chromStart int unsigned not null, # Start position in chromosome
- chromEnd int unsigned not null, # End position in chromosome
- name varchar(255) not null, # Name of item
- score int unsigned not null, # Score from 0-1000
- strand char(1) not null, # + or -
- size int unsigned not null, # Size of element.
- secStr longblob not null, # Parentheses and '.'s which define the secondary structure
- conf longblob not null, # Confidence of secondary-structure annotation per position (0.0-1.0).
- #Indices
- INDEX(name(16)),
- INDEX(chrom(8), bin),
- INDEX(chrom(8), chromStart)
-);
- );
-
- close $fh_out;
-
- Maasha::Common::run( "hgLoadBed", "-notItemRgb -sqlTable=$sql_file $options->{ 'database' } $options->{ 'table' } -tmpDir=$tmp_dir $file > /dev/null 2>&1" );
-
- unlink $sql_file;
- }
- else
- {
- Maasha::Common::run( "hgLoadBed", "$args > /dev/null 2>&1" );
- }
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub psl_get_entry
-{
- # Martin A. Hansen, August 2008.
-
- # Reads PSL next entry from a PSL file and returns a record.
-
- my ( $fh, # file handle of PSL filefull path to PSL file
- ) = @_;
-
- # Returns hashref.
-
- my ( $line, @fields, %record );
-
- while ( $line = <$fh> )
- {
- chomp $line;
-
- @fields = split "\t", $line;
-
- if ( scalar @fields == 21 )
- {
- %record = (
- REC_TYPE => "PSL",
- MATCHES => $fields[ 0 ],
- MISMATCHES => $fields[ 1 ],
- REPMATCHES => $fields[ 2 ],
- NCOUNT => $fields[ 3 ],
- QNUMINSERT => $fields[ 4 ],
- QBASEINSERT => $fields[ 5 ],
- SNUMINSERT => $fields[ 6 ],
- SBASEINSERT => $fields[ 7 ],
- STRAND => $fields[ 8 ],
- Q_ID => $fields[ 9 ],
- Q_LEN => $fields[ 10 ],
- Q_BEG => $fields[ 11 ],
- Q_END => $fields[ 12 ] - 1,
- S_ID => $fields[ 13 ],
- S_LEN => $fields[ 14 ],
- S_BEG => $fields[ 15 ],
- S_END => $fields[ 16 ] - 1,
- BLOCKCOUNT => $fields[ 17 ],
- BLOCKSIZES => $fields[ 18 ],
- Q_BEGS => $fields[ 19 ],
- S_BEGS => $fields[ 20 ],
- );
-
- $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
- $record{ "SPAN" } = $fields[ 12 ] - $fields[ 11 ];
-
- return wantarray ? %record : \%record;
- }
- }
-
- return undef;
-}
-
-
-sub psl_get_entries
-{
- # Martin A. Hansen, February 2008.
-
- # Reads PSL entries and returns a list of records.
-
- my ( $path, # full path to PSL file
- ) = @_;
-
- # Returns hashref.
-
- my ( $fh, @lines, @fields, $i, %record, @records );
-
- $fh = Maasha::Common::read_open( $path );
-
- @lines = <$fh>;
-
- close $fh;
-
- chomp @lines;
-
- for ( $i = 5; $i < @lines; $i++ )
- {
- @fields = split "\t", $lines[ $i ];
-
- Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21;
-
- undef %record;
-
- %record = (
- REC_TYPE => "PSL",
- MATCHES => $fields[ 0 ],
- MISMATCHES => $fields[ 1 ],
- REPMATCHES => $fields[ 2 ],
- NCOUNT => $fields[ 3 ],
- QNUMINSERT => $fields[ 4 ],
- QBASEINSERT => $fields[ 5 ],
- SNUMINSERT => $fields[ 6 ],
- SBASEINSERT => $fields[ 7 ],
- STRAND => $fields[ 8 ],
- Q_ID => $fields[ 9 ],
- Q_LEN => $fields[ 10 ],
- Q_BEG => $fields[ 11 ],
- Q_END => $fields[ 12 ] - 1,
- S_ID => $fields[ 13 ],
- S_LEN => $fields[ 14 ],
- S_BEG => $fields[ 15 ],
- S_END => $fields[ 16 ] - 1,
- BLOCKCOUNT => $fields[ 17 ],
- BLOCKSIZES => $fields[ 18 ],
- Q_BEGS => $fields[ 19 ],
- S_BEGS => $fields[ 20 ],
- );
-
- $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
-
- push @records, { %record };
- }
-
- return wantarray ? @records : \@records;
-}
-
-
-sub psl_put_header
-{
- # Martin A. Hansen, September 2007.
-
- # Write a PSL header to file.
-
- my ( $fh, # file handle - OPTIONAL
- ) = @_;
-
- # Returns nothing.
-
- $fh = \*STDOUT if not $fh;
-
- print $fh qq(psLayout version 3
-match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart match match count bases count bases name size start end name size start end count
----------------------------------------------------------------------------------------------------------------------------------------------------------------
-);
-}
-
-
-sub psl_put_entry
-{
- # Martin A. Hansen, September 2007.
-
- # Write a PSL entry to file.
-
- my ( $record, # hashref
- $fh, # file handle - OPTIONAL
- ) = @_;
-
- # Returns nothing.
-
- $fh = \*STDOUT if not $fh;
-
- my @output;
-
- push @output, $record->{ "MATCHES" };
- push @output, $record->{ "MISMATCHES" };
- push @output, $record->{ "REPMATCHES" };
- push @output, $record->{ "NCOUNT" };
- push @output, $record->{ "QNUMINSERT" };
- push @output, $record->{ "QBASEINSERT" };
- push @output, $record->{ "SNUMINSERT" };
- push @output, $record->{ "SBASEINSERT" };
- push @output, $record->{ "STRAND" };
- push @output, $record->{ "Q_ID" };
- push @output, $record->{ "Q_LEN" };
- push @output, $record->{ "Q_BEG" };
- push @output, $record->{ "Q_END" } + 1;
- push @output, $record->{ "S_ID" };
- push @output, $record->{ "S_LEN" };
- push @output, $record->{ "S_BEG" };
- push @output, $record->{ "S_END" } + 1;
- push @output, $record->{ "BLOCKCOUNT" };
- push @output, $record->{ "BLOCKSIZES" };
- push @output, $record->{ "Q_BEGS" };
- push @output, $record->{ "S_BEGS" };
-
- print $fh join( "\t", @output ), "\n";
-}
-
-
-sub psl_upload_to_ucsc
-{
- # Martin A. Hansen, September 2007.
-
- # Upload a PSL file to the UCSC database.
-
- my ( $file, # file to upload,
- $options, # argument hashref
- $append, # flag indicating table should be appended
- ) = @_;
-
- # Returns nothing.
-
- my ( $args );
-
- if ( $append ) {
- $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file;
- } else {
- $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file;
- }
-
- Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" );
-}
-
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> fixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub fixedstep_get_entry
-{
- # Martin A. Hansen, December 2007.
-
- # Given a file handle to a PhastCons file get the
- # next entry which is all the lines after a "fixedStep"
- # line and until the next "fixedStep" line or EOF.
-
- my ( $fh, # filehandle
- ) = @_;
-
- # Returns a list of lines
-
- my ( $entry, @lines );
-
- local $/ = "\nfixedStep ";
-
- $entry = <$fh>;
-
- chomp $entry;
-
- @lines = split "\n", $entry;
-
- return if @lines == 0;
-
- $lines[ 0 ] =~ s/fixedStep?\s*//;
-
- return wantarray ? @lines : \@lines;
-}
-
-
-sub fixedstep_index_create
-{
- # Martin A. Hansen, January 2008.
-
- # Indexes a concatenated fixedStep file.
- # The index consists of a hash with chromosomes as keys,
- # and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
-
- my ( $path, # path to fixedStep file
- ) = @_;
-
- # Returns a hashref
-
- my ( $fh, $pos, $index_beg, $index_len, $entry, $locator, $chr, $step, $beg, $end, $len, %index, $i );
-
- $fh = Maasha::Common::read_open( $path );
-
- $pos = 0;
-
- while ( $entry = Maasha::UCSC::fixedstep_get_entry( $fh ) )
- {
- $locator = shift @{ $entry };
-
- if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
- {
- $chr = $1;
- $beg = $2 - 1; # fixedStep files are 1-based
- $step = $3;
- }
- else
- {
- Maasha::Common::error( qq(Could not parse locator: $locator) );
- }
-
- $pos += length( $locator ) + 11;
-
- $index_beg = $pos;
-
-# map { $pos += length( $_ ) + 1 } @{ $entry };
-
- $pos += 6 * scalar @{ $entry };
-
- $index_len = $pos - $index_beg;
-
- push @{ $index{ $chr } }, [ $beg, undef, $beg + scalar @{ $entry } - 1, $index_beg, $index_len ];
- }
-
- close $fh;
-
- foreach $chr ( keys %index )
- {
- for ( $i = 0; $i < @{ $index{ $chr } } - 1; $i++ ) {
- $index{ $chr }->[ $i ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ];
- }
-
- $index{ $chr }->[ -1 ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ FS_CHR_END ] + 1;
- }
-
- return wantarray ? %index : \%index;
-}
-
-
-sub fixedstep_index_store
-{
- # Martin A. Hansen, January 2008.
-
- # Writes a fixedStep index to binary file.
-
- my ( $path, # full path to file
- $index, # list with index
- ) = @_;
-
- # returns nothing
-
- Maasha::Common::file_store( $path, $index );
-}
-
-
-sub fixedstep_index_retrieve
-{
- # Martin A. Hansen, January 2008.
-
- # Retrieves a fixedStep index from binary file.
-
- my ( $path, # full path to file
- ) = @_;
-
- # returns list
-
- my $index;
-
- $index = Maasha::Common::file_retrieve( $path );
-
- return wantarray ? %{ $index } : $index;
-}
-
-
-sub fixedstep_index_lookup
-{
- # Martin A. Hansen, January 2008.
-
- # Retrieve fixedStep scores from a indexed
- # fixedStep file given a chromosome and
- # begin and end positions.
-
- my ( $index, # data structure
- $fh, # filehandle to datafile
- $chr, # chromosome
- $chr_beg, # chromosome beg
- $chr_end, # chromosome end
- $flank, # include flanking region - OPTIONAL
- ) = @_;
-
- # Returns a list
-
- my ( $index_beg, $index_end, $i, $c, $beg, $end, @vals, $scores );
-
- $flank ||= 0;
-
- $chr_beg -= $flank;
- $chr_end += $flank;
-
- # print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n";
-
- if ( exists $index->{ $chr } )
- {
- $index_beg = Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_beg );
-
- if ( $index_beg < 0 ) {
- Maasha::Common::error( qq(Index search failed - begin index position doesn't exists: $chr_beg) );
- }
-
- if ( $chr_end < $index->{ $chr }->[ $index_beg ]->[ 1 ] )
- {
- $index_end = $index_beg;
- }
- else
- {
- $index_end = Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_end );
-
- if ( $index_end < 0 ) {
- Maasha::Common::error( qq(Index search failed - end index position doesn't exists: $chr_end) );
- }
- }
-
- map { $scores->[ $_ ] = 0 } 0 .. $chr_end - $chr_beg;
-
- if ( $index_beg == $index_end )
- {
- $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
- $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
-
- if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] )
- {
- @vals = split "\n", Maasha::Common::file_read(
- $fh,
- $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
- 6 * ( $end - $beg + 1 ),
- );
- }
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
- }
- }
- else
- {
- $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
-
-# print Dumper( $beg, $index->{ $chr }->[ $index_beg ] );
-# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ FS_NEXT_CHR_BEG ] );
-
- # beg next
- # v v
- # |||||||||.......
-
- if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] )
- {
- @vals = split "\n", Maasha::Common::file_read(
- $fh,
- $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
- 6 * ( $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] - $beg + 1 ),
- );
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
- }
- }
-
- $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
-
- if ( $end <= $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] )
- {
- @vals = split "\n", Maasha::Common::file_read(
- $fh,
- $index->{ $chr }->[ $index_end ]->[ FS_INDEX_BEG ],
- 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] + 1 ),
- );
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
- }
- }
-
- for ( $i = $index_beg + 1; $i <= $index_end - 1; $i++ )
- {
- @vals = split "\n", Maasha::Common::file_read(
- $fh,
- $index->{ $chr }->[ $i ]->[ FS_INDEX_BEG ],
- 6 * ( $index->{ $chr }->[ $i ]->[ FS_CHR_END ] - $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] + 1 ),
- );
-
- for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
- }
- }
- }
- }
- else
- {
- Maasha::Common::error( qq(Chromosome "$chr" was not found in index) );
- }
-
- return wantarray ? @{ $scores } : $scores;
-}
-
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# Given a PhastCons entry converts this to a
# list of super blocks.
+# $options->{ "min" } ||= 10;
+# $options->{ "dist" } ||= 25;
+# $options->{ "threshold" } ||= 0.8;
+# $options->{ "gap" } ||= 5;
+
my ( $lines, # list of lines
$args, # argument hash
) = @_;
$lens[ -1 ]++;
push @entries, {
- CHR => $super_block->[ 0 ]->{ "CHR" },
- CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
- Q_ID => "Q_ID",
- SCORE => 100,
- STRAND => "+",
- THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
- ITEMRGB => "0,200,100",
- BLOCKCOUNT => scalar @{ $super_block },
- BLOCKSIZES => join( ",", @lens ),
- Q_BEGS => join( ",", @begs ),
+ CHR => $super_block->[ 0 ]->{ "CHR" },
+ CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
+ Q_ID => "Q_ID",
+ SCORE => 100,
+ STRAND => "+",
+ THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
+ COLOR => 0,
+ BLOCK_COUNT => scalar @{ $super_block },
+ BLOCK_LENS => join( ",", @lens ),
+ Q_BEGS => join( ",", @begs ),
};
undef @begs;
# Normalizes a list of lists with PhastCons scores,
# in such a way that each list contains the same number
- # or PhastCons scores.
+ # of PhastCons scores.
my ( $AoA, # AoA with PhastCons scores
) = @_;
# splice @{ $list }, $pos, 0, "X";
}
- die qq(ERROR: bad inflate\n) if scalar @{ $list } != $len + $diff;
+ die qq(ERROR: Bad inflate\n) if scalar @{ $list } != $len + $diff;
}
splice @{ $list }, $pos, 1;
}
- die qq(ERROR: bad deflate\n) if scalar @{ $list } != $len - $diff;
+ die qq(ERROR: Dad deflate\n) if scalar @{ $list } != $len - $diff;
}
my ( $fh, $line, @fields, @align );
- $fh = Maasha::Common::read_open( $path );
+ $fh = Maasha::Filesys::file_read_open( $path );
while ( $line = <$fh> )
{
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> WIGGLE FORMAT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub fixedstep_put_entry
-{
- # Martin A. Hansen, April 2008.
-
- # Outputs a block of fixedStep values.
- # Used for outputting wiggle data.
-
- my ( $chr, # chromosome
- $beg, # start position
- $block, # list of scores
- $fh, # filehandle - OPTIONAL
- $log10, # flag indicating that log10 scores should be used
- ) = @_;
-
- # Returns nothing.
-
- $beg += 1; # fixedStep format is 1 based.
-
- $fh ||= \*STDOUT;
-
- print $fh "fixedStep chrom=$chr start=$beg step=1\n";
-
- if ( $log10 ) {
- map { printf( $fh "%f\n", Maasha::Calc::log10( $_ + 1 ) ) } @{ $block };
- } else {
- map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block };
- }
-}
-
-
-sub wiggle_upload_to_ucsc
-{
- # Martin A. Hansen, May 2008.
-
- # Upload a wiggle file to the UCSC database.
-
- my ( $tmp_dir, # temporary directory
- $wib_dir, # wib directory
- $wig_file, # file to upload,
- $options, # argument hashref
- ) = @_;
-
- # Returns nothing.
-
- my ( $args );
-
-# $args = join " ", "-tmpDir=$tmp_dir", "-pathPrefix=$wib_dir", $options->{ "database" }, $options->{ 'table' }, $wig_file;
-
-# Maasha::Common::run( "hgLoadWiggle", "$args > /dev/null 2>&1" );
-
- if ( $options->{ 'verbose' } ) {
- `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file`;
- } else {
- `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
- }
-}
-
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MySQL CONF <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# Returns a string.
- my ( $fh, $line, $user );
+ my ( $file, $fh, $line, $user );
+
+ $file = "$ENV{ 'HOME' }/.hg.conf";
- $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+ return if not -f $file;
+
+ $fh = Maasha::Filesys::file_read_open( $file );
while ( $line = <$fh> )
{
# Returns a string.
- my ( $fh, $line, $password );
+ my ( $file, $fh, $line, $password );
+
+ $file = "$ENV{ 'HOME' }/.hg.conf";
+
+ return if not -f $file;
- $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+ $fh = Maasha::Filesys::file_read_open( $file );
while ( $line = <$fh> )
{