# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use vars qw ( @ISA @EXPORT );
use Data::Dumper;
-use Time::HiRes qw( gettimeofday );
-
use Maasha::Common;
+use Maasha::Filesys;
use Maasha::Calc;
use Maasha::Matrix;
use constant {
- CHR_BEG => 0,
- NEXT_CHR_BEG => 1,
+ FS_CHR_BEG => 0,
+ FS_NEXT_CHR_BEG => 1,
+ FS_CHR_END => 2,
+ FS_INDEX_BEG => 3,
+ FS_INDEX_LEN => 4,
+
+ CHR => 0,
+ CHR_BEG => 1,
CHR_END => 2,
- INDEX_BEG => 3,
- INDEX_LEN => 4,
+ Q_ID => 3,
+ SCORE => 4,
+ STRAND => 5,
+ THICK_BEG => 6,
+ THICK_END => 7,
+ COLOR => 8,
+ BLOCK_COUNT => 9,
+ BLOCK_LENS => 10,
+ Q_BEGS => 11,
};
@ISA = qw( Exporter );
-my $TIME = gettimeofday();
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# http://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#BED
+sub bed_entry_get_array
+{
+ # Martin A. Hansen, September 2008.
+
+ # Reads a BED entry given a filehandle.
+
+ # This is a new _faster_ BED entry parser that
+ # uses arrays and not hashrefs.
+
+ # IMPORTANT! This function does not correct the
+ # BED_END position that is kept the way UCSC
+ # does.
+
+ my ( $fh, # file handle
+ $cols, # columns to read - OPTIONAL (3,4,5,6, or 12)
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $line, @entry );
+
+ $line = <$fh>;
+
+ $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
+
+ return if not defined $line;
+
+ if ( not defined $cols ) {
+ $cols = 1 + $line =~ tr/\t//;
+ }
+
+ @entry = split "\t", $line, $cols + 1;
+
+ pop @entry if scalar @entry > $cols;
+
+ return wantarray ? @entry : \@entry;
+}
+
+
sub bed_get_entry
{
# Martin A. Hansen, December 2007.
$line = <$fh>;
- $line =~ s/(\n|\r)$//g; # some people have carriage returns in their BED files -> Grrrr
+ $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
return if not defined $line;
elsif ( $columns == 12 )
{
%entry = (
- "CHR" => $fields[ 0 ],
- "CHR_BEG" => $fields[ 1 ],
- "CHR_END" => $fields[ 2 ] - 1,
- "Q_ID" => $fields[ 3 ],
- "SCORE" => $fields[ 4 ],
- "STRAND" => $fields[ 5 ],
- "THICK_BEG" => $fields[ 6 ],
- "THICK_END" => $fields[ 7 ] - 1,
- "ITEMRGB" => $fields[ 8 ],
- "BLOCKCOUNT" => $fields[ 9 ],
- "BLOCKSIZES" => $fields[ 10 ],
- "Q_BEGS" => $fields[ 11 ],
+ "CHR" => $fields[ 0 ],
+ "CHR_BEG" => $fields[ 1 ],
+ "CHR_END" => $fields[ 2 ] - 1,
+ "Q_ID" => $fields[ 3 ],
+ "SCORE" => $fields[ 4 ],
+ "STRAND" => $fields[ 5 ],
+ "THICK_BEG" => $fields[ 6 ],
+ "THICK_END" => $fields[ 7 ] - 1,
+ "COLOR" => $fields[ 8 ],
+ "BLOCK_COUNT" => $fields[ 9 ],
+ "BLOCK_LENS" => $fields[ 10 ],
+ "Q_BEGS" => $fields[ 11 ],
);
}
else
}
+sub bed_entry_put_array
+{
+ # Martin A. Hansen, Septermber 2008.
+
+ # Writes a BED entry array to file.
+
+ # IMPORTANT! This function does not correct the
+ # BED_END position that is assumed to be in the
+ # UCSC positions scheme.
+
+ my ( $record, # list
+ $fh, # file handle - OPTIONAL
+ $cols, # number of columns in BED file - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ $fh = \*STDOUT if not defined $fh;
+
+ if ( defined $cols ) {
+ print $fh join( "\t", @{ $record }[ 0 .. $cols - 1 ] ), "\n";
+ } else {
+ print $fh join( "\t", @{ $record } ), "\n";
+ }
+}
+
+
sub bed_put_entry
{
# Martin A. Hansen, Septermber 2007.
push @fields, $record->{ "STRAND" };
push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" };
push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" };
- push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" };
- push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" };
- push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" };
+ push @fields, $record->{ "COLOR" } if defined $record->{ "COLOR" };
+ push @fields, $record->{ "BLOCK_COUNT" } if defined $record->{ "BLOCK_COUNT" };
+ push @fields, $record->{ "BLOCK_LENS" } if defined $record->{ "BLOCK_LENS" };
push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" };
}
$intron_max = 0;
$intron_min = 9999999999;
- $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" };
+ $entry->{ "EXONS" } = $entry->{ "BLOCK_COUNT" };
@begs = split /,/, $entry->{ "Q_BEGS" };
- @lens = split /,/, $entry->{ "BLOCKSIZES" };
+ @lens = split /,/, $entry->{ "BLOCK_LENS" };
- for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
+ for ( $i = 0; $i < $entry->{ "BLOCK_COUNT" }; $i++ )
{
- $exon_len = @lens[ $i ];
+ $exon_len = $lens[ $i ];
$entry->{ "EXON_LEN_$i" } = $exon_len;
$entry->{ "EXON_MIN_LEN" } = $exon_min;
$entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } );
- $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1;
+ $entry->{ "INTRONS" } = $entry->{ "BLOCK_COUNT" } - 1;
$entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0;
if ( $entry->{ "INTRONS" } )
{
- for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
+ for ( $i = 1; $i < $entry->{ "BLOCK_COUNT" }; $i++ )
{
- $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] );
+ $intron_len = $begs[ $i ] - ( $begs[ $i - 1 ] + $lens[ $i - 1 ] );
$entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len;
sub bed_sort
{
- # Martin A. Hansen, March 2008.
+ # Martin A. Hansen, Septermber 2008
- # Sort a potential huge BED file according to
- # CHR, CHR_BEG and optionally STRAND.
+ # Sorts a BED file using the c program
+ # "bed_sort" specifing a sort mode:
- my ( $tmp_dir, # temporary directory used for sorting
- $file, # BED file to sort
- $strand, # flag to sort on strand - OPTIONAL
- ) = @_;
+ # 1: chr AND chr_beg.
+ # 2: chr AND strand AND chr_beg.
+ # 3: chr_beg.
+ # 4: strand AND chr_beg.
- # Returns nothing.
+ my ( $bed_file, # BED file to sort
+ $sort_mode, # See above.
+ $cols, # Number of columns in BED file
+ ) = @_;
- my ( $fh_in, $key, $fh_out, %fh_hash, $part_file, $entry, $entries );
+ &Maasha::Common::run( "bed_sort", "--sort $sort_mode --cols $cols $bed_file" );
+}
- $fh_in = Maasha::Common::read_open( $file );
- while ( $entry = bed_get_entry( $fh_in ) )
- {
- if ( $strand ) {
- $key = join "_", $entry->{ "CHR" }, $entry->{ "STRAND" };
- } else {
- $key = $entry->{ "CHR" };
- }
+sub bed_split_to_files
+{
+ # Martin A. Hansen, Septermber 2008
- $fh_hash{ $key } = Maasha::Common::write_open( "$tmp_dir/$key.sort" ) if not exists $fh_hash{ $key };
-
- bed_put_entry( $entry, $fh_hash{ $key } );
- }
+ # Given a list of BED files, split these
+ # into temporary files based on the chromosome
+ # name. Returns a list of the temporary files.
- close $fh_in;
+ my ( $bed_files, # list of BED files to split
+ $cols, # number of columns
+ $tmp_dir, # temporary directory
+ ) = @_;
- map { close $_ } keys %fh_hash;
+ # Returns a list.
- $fh_out = Maasha::Common::write_open( "$tmp_dir/temp.sort" );
+ my ( $bed_file, $fh_in, $entry, $key, %fh_hash, @tmp_files );
- foreach $part_file ( sort keys %fh_hash )
+ foreach $bed_file ( @{ $bed_files } )
{
- $entries = bed_get_entries( "$tmp_dir/$part_file.sort" );
+ $fh_in = Maasha::Common::read_open( $bed_file );
- @{ $entries } = sort { $a->{ "CHR_BEG" } <=> $b->{ "CHR_BEG" } } @{ $entries };
-
- map { bed_put_entry( $_, $fh_out ) } @{ $entries };
+ while ( $entry = bed_entry_get_array( $fh_in, $cols ) )
+ {
+ $key = $entry->[ CHR ];
- unlink "$tmp_dir/$part_file.sort";
+ $fh_hash{ $key } = Maasha::Common::write_open( "$tmp_dir/$key.temp" ) if not exists $fh_hash{ $key };
+
+ bed_entry_put_array( $entry, $fh_hash{ $key } );
+ }
+
+ close $fh_in;
}
- close $fh_out;
+ foreach $key ( sort keys %fh_hash )
+ {
+ push @tmp_files, "$tmp_dir/$key.temp";
+
+ close $fh_hash{ $key };
+ }
- rename "$tmp_dir/temp.sort", $file;
+ return wantarray ? @tmp_files : \@tmp_files;
}
if ( exists $entries->[ $i ]->{ "Q_BEGS" } )
{
@q_begs = split ",", $entries->[ $i ]->{ "Q_BEGS" };
- @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" };
+ @blocksizes = split ",", $entries->[ $i ]->{ "BLOCK_LENS" };
}
else
{
map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs;
%new_entry = (
- CHR => $entries->[ 0 ]->{ "CHR" },
- CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" },
- CHR_END => $entries->[ -1 ]->{ "CHR_END" },
- REC_TYPE => "BED",
- BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
- BED_COLS => 12,
- Q_ID => join( ":", @q_ids ),
- SCORE => 999,
- STRAND => $entries->[ 0 ]->{ "STRAND" } || "+",
- THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" },
- THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
- ITEMRGB => "0,0,0",
- BLOCKCOUNT => scalar @new_q_begs,
- BLOCKSIZES => join( ",", @new_blocksizes ),
- Q_BEGS => join( ",", @new_q_begs ),
+ CHR => $entries->[ 0 ]->{ "CHR" },
+ CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $entries->[ -1 ]->{ "CHR_END" },
+ REC_TYPE => "BED",
+ BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
+ BED_COLS => 12,
+ Q_ID => join( ":", @q_ids ),
+ SCORE => 999,
+ STRAND => $entries->[ 0 ]->{ "STRAND" } || "+",
+ THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
+ COLOR => 0,
+ BLOCK_COUNT => scalar @new_q_begs,
+ BLOCK_LENS => join( ",", @new_blocksizes ),
+ Q_BEGS => join( ",", @new_q_begs ),
);
return wantarray ? %new_entry : \%new_entry;
my ( @q_begs, @blocksizes, $block, @blocks, $i );
- if ( exists $entry->{ "BLOCKCOUNT" } )
+ if ( exists $entry->{ "BLOCK_COUNT" } )
{
@q_begs = split ",", $entry->{ "Q_BEGS" };
- @blocksizes = split ",", $entry->{ "BLOCKSIZES" };
+ @blocksizes = split ",", $entry->{ "BLOCK_LENS" };
for ( $i = 0; $i < @q_begs; $i++ )
{
$args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", $file;
}
- if ( $options->{ "sec_struct" } )
+ if ( $options->{ "table" } =~ /rnaSecStr/ )
{
$table = $options->{ "table" };
- Maasha::Common::error( "Attempt to load secondary structure track without 'rnaSecStr' in table name" ) if not $table =~ /rnaSecStr/;
+ print qq(uploading secondary structure table:"$table"\n) if $options->{ "verbose" };
$sql_file = "$tmp_dir/upload_RNA_SS.sql";
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub psl_get_entries
+sub psl_upload_to_ucsc
{
- # Martin A. Hansen, February 2008.
+ # Martin A. Hansen, September 2007.
- # Reads PSL entries and returns a record.
+ # Upload a PSL file to the UCSC database.
- my ( $path, # full path to PSL file
+ my ( $file, # file to upload,
+ $options, # argument hashref
+ $append, # flag indicating table should be appended
) = @_;
- # Returns hashref.
-
- my ( $fh, @lines, @fields, $i, %record, @records );
-
- $fh = Maasha::Common::read_open( $path );
-
- @lines = <$fh>;
-
- close $fh;
-
- chomp @lines;
+ # Returns nothing.
- for ( $i = 5; $i < @lines; $i++ )
- {
- @fields = split "\t", $lines[ $i ];
-
- Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21;
-
- undef %record;
-
- %record = (
- REC_TYPE => "PSL",
- MATCHES => $fields[ 0 ],
- MISMATCHES => $fields[ 1 ],
- REPMATCHES => $fields[ 2 ],
- NCOUNT => $fields[ 3 ],
- QNUMINSERT => $fields[ 4 ],
- QBASEINSERT => $fields[ 5 ],
- SNUMINSERT => $fields[ 6 ],
- SBASEINSERT => $fields[ 7 ],
- STRAND => $fields[ 8 ],
- Q_ID => $fields[ 9 ],
- Q_LEN => $fields[ 10 ],
- Q_BEG => $fields[ 11 ],
- Q_END => $fields[ 12 ] - 1,
- S_ID => $fields[ 13 ],
- S_LEN => $fields[ 14 ],
- S_BEG => $fields[ 15 ],
- S_END => $fields[ 16 ] - 1,
- BLOCKCOUNT => $fields[ 17 ],
- BLOCKSIZES => $fields[ 18 ],
- Q_BEGS => $fields[ 19 ],
- S_BEGS => $fields[ 20 ],
- );
+ my ( $args );
- $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
-
- push @records, { %record };
+ if ( $append ) {
+ $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file;
+ } else {
+ $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file;
}
- return wantarray ? @records : \@records;
+ Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" );
}
-sub psl_put_header
-{
- # Martin A. Hansen, September 2007.
-
- # Write a PSL header to file.
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
- my ( $fh, # file handle - OPTIONAL
- ) = @_;
- # Returns nothing.
+sub ucsc_config_entry_get
+{
+ # Martin A. Hansen, November 2008.
- $fh = \*STDOUT if not $fh;
+ # Given a filehandle to a UCSC Genome Browser
+ # config file (.ra) get the next entry and
+ # return as a hash. Entries are separated by blank
+ # lines. # lines are skipped unless it is the lines:
+ # # Track added ...
+ # # Database ...
- print $fh qq(psLayout version 3
-match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart match match count bases count bases name size start end name size start end count
----------------------------------------------------------------------------------------------------------------------------------------------------------------
-);
-}
+ my ( $fh, # file hanlde
+ ) = @_;
+ # Returns a hashref.
-sub psl_put_entry
-{
- # Martin A. Hansen, September 2007.
+ my ( $line, %record );
- # Write a PSL entry to file.
+ while ( $line = <$fh> )
+ {
+ $line =~ tr/\n\r//d;
+
+ if ( $line =~ /Track added by 'upload_to_ucsc' (\S+) (\S+)/) {
+ $record{ 'date' } = $1;
+ $record{ 'time' } = $2;
+ } elsif ( $line =~ /^# date: (.+)/ ) {
+ $record{ 'date' } = $1;
+ } elsif ( $line =~ /^# time: (.+)/ ) {
+ $record{ 'time' } = $1;
+ } elsif ( $line =~ /^# (?:database:|Database) (.+)/ ) {
+ $record{ 'database' } = $1;
+ } elsif ( $line =~ /^#/ ) {
+ # skip
+ } elsif ( $line =~ /(\S+)\s+(.+)/ ) {
+ $record{ $1 } = $2;
+ }
- my ( $record, # hashref
- $fh, # file handle - OPTIONAL
- ) = @_;
+ last if $line =~ /^$/ and exists $record{ "track" };
+ }
- # Returns nothing.
+ return undef if not exists $record{ "track" };
- $fh = \*STDOUT if not $fh;
-
- my @output;
-
- push @output, $record->{ "MATCHES" };
- push @output, $record->{ "MISMATCHES" };
- push @output, $record->{ "REPMATCHES" };
- push @output, $record->{ "NCOUNT" };
- push @output, $record->{ "QNUMINSERT" };
- push @output, $record->{ "QBASEINSERT" };
- push @output, $record->{ "SNUMINSERT" };
- push @output, $record->{ "SBASEINSERT" };
- push @output, $record->{ "STRAND" };
- push @output, $record->{ "Q_ID" };
- push @output, $record->{ "Q_LEN" };
- push @output, $record->{ "Q_BEG" };
- push @output, $record->{ "Q_END" } + 1;
- push @output, $record->{ "S_ID" };
- push @output, $record->{ "S_LEN" };
- push @output, $record->{ "S_BEG" };
- push @output, $record->{ "S_END" } + 1;
- push @output, $record->{ "BLOCKCOUNT" };
- push @output, $record->{ "BLOCKSIZES" };
- push @output, $record->{ "Q_BEGS" };
- push @output, $record->{ "S_BEGS" };
-
- print $fh join( "\t", @output ), "\n";
+ return wantarray ? %record : \%record;
}
-sub psl_upload_to_ucsc
+sub ucsc_config_entry_put
{
- # Martin A. Hansen, September 2007.
+ # Martin A. Hansen, November 2008.
- # Upload a PSL file to the UCSC database.
+ # Outputs a Biopiece record (a hashref)
+ # to a filehandle or STDOUT.
- my ( $file, # file to upload,
- $options, # argument hashref
- $append, # flag indicating table should be appended
+ my ( $record, # hashref
+ $fh_out, # file handle
) = @_;
# Returns nothing.
- my ( $args );
-
- if ( $append ) {
- $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file;
- } else {
- $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file;
- }
-
- Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" );
+ my ( $date, $time );
+
+ $fh_out ||= \*STDOUT;
+
+ ( $date, $time ) = split " ", Maasha::Common::time_stamp();
+
+ $record->{ "date" } ||= $date;
+ $record->{ "time" } ||= $time;
+
+ print $fh_out "\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n";
+
+ map { print $fh_out "# $_: $record->{ $_ }\n" if exists $record->{ $_ } } qw( date time database );
+ map { print $fh_out "$_ $record->{ $_ }\n" if exists $record->{ $_ } } qw( track
+ name
+ description
+ itemRgb
+ db
+ offset
+ url
+ htmlUrl
+ shortLabel
+ longLabel
+ group
+ priority
+ useScore
+ visibility
+ maxHeightPixels
+ color
+ mafTrack
+ type );
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub update_my_tracks
+sub ucsc_update_config
{
# Martin A. Hansen, September 2007.
# Returns nothing.
- my ( $file, $fh_in, $fh_out, $line, $time );
+ my ( $file, $entry, %new_entry, $fh_in, $fh_out, $was_found );
$file = $ENV{ "HOME" } . "/ucsc/my_tracks.ra";
- # ---- create a backup ----
-
- $fh_in = Maasha::Common::read_open( $file );
- $fh_out = Maasha::Common::write_open( "$file~" );
-
- while ( $line = <$fh_in> ) {
- print $fh_out $line;
- }
-
- close $fh_in;
- close $fh_out;
-
- # ---- append track ----
-
- $time = Maasha::Common::time_stamp();
-
- $fh_out = Maasha::Common::append_open( $file );
-
- if ( $type eq "sec_struct" )
- {
- print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
+ Maasha::Filesys::file_copy( $file, "$file~" ); # create a backup
- print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n";
+ %new_entry = (
+ database => $options->{ 'database' },
+ track => $options->{ 'table' },
+ shortLabel => $options->{ 'short_label' },
+ longLabel => $options->{ 'long_label' },
+ group => $options->{ 'group' },
+ priority => $options->{ 'priority' },
+ visibility => $options->{ 'visibility' },
+ color => $options->{ 'color' },
+ type => $type,
+ );
- print $fh_out "# Database $options->{ 'database' }\n\n";
+ $new_entry{ 'useScore' } = 1 if $options->{ 'use_score' };
+ $new_entry{ 'mafTrack' } = "multiz17way" if $type eq "type bed 6 +";
+ $new_entry{ 'maxHeightPixels' } = "50:50:11" if $type eq "wig 0";
- print $fh_out "track $options->{ 'table' }\n";
- print $fh_out "shortLabel $options->{ 'short_label' }\n";
- print $fh_out "longLabel $options->{ 'long_label' }\n";
- print $fh_out "group $options->{ 'group' }\n";
- print $fh_out "priority $options->{ 'priority' }\n";
- print $fh_out "visibility $options->{ 'visibility' }\n";
- print $fh_out "color $options->{ 'color' }\n";
- print $fh_out "type bed 6 +\n";
- print $fh_out "mafTrack multiz17way\n";
+ $fh_in = Maasha::Filesys::file_read_open( "$file~" );
+ $fh_out = Maasha::Filesys::file_write_open( $file );
- print $fh_out "\n# //\n";
- }
- else
+ while ( $entry = ucsc_config_entry_get( $fh_in ) )
{
- print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
-
- print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n";
-
- print $fh_out "# Database $options->{ 'database' }\n\n";
-
- print $fh_out "track $options->{ 'table' }\n";
- print $fh_out "shortLabel $options->{ 'short_label' }\n";
- print $fh_out "longLabel $options->{ 'long_label' }\n";
- print $fh_out "group $options->{ 'group' }\n";
- print $fh_out "priority $options->{ 'priority' }\n";
- print $fh_out "useScore 1\n" if $options->{ 'use_score' };
- print $fh_out "visibility $options->{ 'visibility' }\n";
- print $fh_out "maxHeightPixels 50:50:11\n" if $type eq "wig 0";
- print $fh_out "color $options->{ 'color' }\n";
- print $fh_out "type $type\n";
+ if ( $entry->{ 'database' } eq $new_entry{ 'database' } and $entry->{ 'track' } eq $new_entry{ 'track' } )
+ {
+ ucsc_config_entry_put( \%new_entry, $fh_out );
- print $fh_out "\n# //\n";
+ $was_found = 1;
+ }
+ else
+ {
+ ucsc_config_entry_put( $entry, $fh_out );
+ }
}
+ ucsc_config_entry_put( \%new_entry, $fh_out ) if not $was_found;
+
+ close $fh_in;
close $fh_out;
Maasha::Common::run( "ucscMakeTracks.pl", "-b > /dev/null 2>&1" );
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> fixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-sub phastcons_get_entry
+sub fixedstep_get_entry
{
# Martin A. Hansen, December 2007.
}
-sub phastcons_parse_entry
-{
- # Martin A. Hansen, December 2007.
-
- # Given a PhastCons entry converts this to a
- # list of super blocks.
-
- my ( $lines, # list of lines
- $args, # argument hash
- ) = @_;
-
- # Returns
-
- my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
-
- $info = shift @{ $lines };
-
- if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
- {
- $chr = $1;
- $beg = $2;
- $step = $3;
-
- die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
- }
-
- $i = 0;
-
- while ( $i < @{ $lines } )
- {
- if ( $lines->[ $i ] >= $args->{ "threshold" } )
- {
- $c = $i + 1;
-
- while ( $c < @{ $lines } )
- {
- if ( $lines->[ $c ] < $args->{ "threshold" } )
- {
- $j = $c + 1;
-
- while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
- $j++;
- }
-
- if ( $j - $c > $args->{ "gap" } )
- {
- if ( $c - $i >= $args->{ "min" } )
- {
- push @blocks, {
- CHR => $chr,
- CHR_BEG => $beg + $i - 1,
- CHR_END => $beg + $c - 2,
- CHR_LEN => $c - $i,
- };
- }
-
- $i = $j;
-
- last;
- }
-
- $c = $j
- }
- else
- {
- $c++;
- }
- }
-
- if ( $c - $i >= $args->{ "min" } )
- {
- push @blocks, {
- CHR => $chr,
- CHR_BEG => $beg + $i - 1,
- CHR_END => $beg + $c - 2,
- CHR_LEN => $c - $i,
- };
- }
-
- $i = $c;
- }
- else
- {
- $i++;
- }
- }
-
- $i = 0;
-
- while ( $i < @blocks )
- {
- $c = $i + 1;
-
- while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
- {
- $c++;
- }
-
- push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
-
- $i = $c;
- }
-
- foreach $super_block ( @super_blocks )
- {
- foreach $block ( @{ $super_block } )
- {
- push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
- push @lens, $block->{ "CHR_LEN" } - 1;
- }
-
- $lens[ -1 ]++;
-
- push @entries, {
- CHR => $super_block->[ 0 ]->{ "CHR" },
- CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
- Q_ID => "Q_ID",
- SCORE => 100,
- STRAND => "+",
- THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
- ITEMRGB => "0,200,100",
- BLOCKCOUNT => scalar @{ $super_block },
- BLOCKSIZES => join( ",", @lens ),
- Q_BEGS => join( ",", @begs ),
- };
-
- undef @begs;
- undef @lens;
- }
-
- return wantarray ? @entries : \@entries;
-}
-
-
-sub phastcons_index_create
+sub fixedstep_index_create
{
# Martin A. Hansen, January 2008.
- # Indexes a concatenated PhastCons file.
+ # Indexes a concatenated fixedStep file.
# The index consists of a hash with chromosomes as keys,
# and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
- my ( $path, # path to PhastCons file
+ my ( $path, # path to fixedStep file
) = @_;
# Returns a hashref
$pos = 0;
- while ( $entry = Maasha::UCSC::phastcons_get_entry( $fh ) )
+ while ( $entry = Maasha::UCSC::fixedstep_get_entry( $fh ) )
{
$locator = shift @{ $entry };
if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
{
$chr = $1;
- $beg = $2 - 1; # phastcons files are 1-based
+ $beg = $2 - 1; # fixedStep files are 1-based
$step = $3;
}
else
{
- Maasha::Common::error( qq(Could not parse PhastCons locator: $locator) );
+ Maasha::Common::error( qq(Could not parse locator: $locator) );
}
$pos += length( $locator ) + 11;
foreach $chr ( keys %index )
{
for ( $i = 0; $i < @{ $index{ $chr } } - 1; $i++ ) {
- $index{ $chr }->[ $i ]->[ NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ];
+ $index{ $chr }->[ $i ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ];
}
- $index{ $chr }->[ -1 ]->[ NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ CHR_END ] + 1;
+ $index{ $chr }->[ -1 ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ FS_CHR_END ] + 1;
}
return wantarray ? %index : \%index;
}
-sub phastcons_index_store
+sub fixedstep_index_store
{
# Martin A. Hansen, January 2008.
- # Writes a PhastCons index to binary file.
+ # Writes a fixedStep index to binary file.
my ( $path, # full path to file
$index, # list with index
}
-sub phastcons_index_retrieve
+sub fixedstep_index_retrieve
{
# Martin A. Hansen, January 2008.
- # Retrieves a PhastCons index from binary file.
+ # Retrieves a fixedStep index from binary file.
my ( $path, # full path to file
) = @_;
}
-sub phastcons_index_lookup
+sub fixedstep_index_lookup
{
# Martin A. Hansen, January 2008.
- # Retrieve PhastCons scores from a indexed
- # Phastcons file given a chromosome and
+ # Retrieve fixedStep scores from a indexed
+ # fixedStep file given a chromosome and
# begin and end positions.
my ( $index, # data structure
$chr_beg -= $flank;
$chr_end += $flank;
-# print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n";
+ # print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n";
if ( exists $index->{ $chr } )
{
if ( $index_beg == $index_end )
{
- $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] );
- $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ CHR_END ] );
+ $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
+ $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
- if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] )
+ if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] )
{
@vals = split "\n", Maasha::Common::file_read(
$fh,
- $index->{ $chr }->[ $index_beg ]->[ INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ),
+ $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
6 * ( $end - $beg + 1 ),
);
}
}
else
{
- $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] );
+ $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] );
# print Dumper( $beg, $index->{ $chr }->[ $index_beg ] );
-# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ NEXT_CHR_BEG ] );
+# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ FS_NEXT_CHR_BEG ] );
# beg next
# v v
# |||||||||.......
- if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ CHR_END ] )
+ if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] )
{
@vals = split "\n", Maasha::Common::file_read(
$fh,
- $index->{ $chr }->[ $index_beg ]->[ INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ),
- 6 * ( $index->{ $chr }->[ $index_beg ]->[ CHR_END ] - $beg + 1 ),
+ $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ),
+ 6 * ( $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] - $beg + 1 ),
);
for ( $c = 0; $c < @vals; $c++ ) {
}
}
- $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ CHR_END ] );
+ $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] );
- if ( $end <= $index->{ $chr }->[ $index_end ]->[ CHR_END ] )
+ if ( $end <= $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] )
{
@vals = split "\n", Maasha::Common::file_read(
$fh,
- $index->{ $chr }->[ $index_end ]->[ INDEX_BEG ],
- 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ CHR_BEG ] + 1 ),
+ $index->{ $chr }->[ $index_end ]->[ FS_INDEX_BEG ],
+ 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] + 1 ),
);
for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ CHR_BEG ] - $chr_beg ] = $vals[ $c ];
+ $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
}
}
{
@vals = split "\n", Maasha::Common::file_read(
$fh,
- $index->{ $chr }->[ $i ]->[ INDEX_BEG ],
- 6 * ( $index->{ $chr }->[ $i ]->[ CHR_END ] - $index->{ $chr }->[ $i ]->[ CHR_BEG ] + 1 ),
+ $index->{ $chr }->[ $i ]->[ FS_INDEX_BEG ],
+ 6 * ( $index->{ $chr }->[ $i ]->[ FS_CHR_END ] - $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] + 1 ),
);
for ( $c = 0; $c < @vals; $c++ ) {
- $scores->[ $c + $index->{ $chr }->[ $i ]->[ CHR_BEG ] - $chr_beg ] = $vals[ $c ];
+ $scores->[ $c + $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ];
}
}
}
}
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub phastcons_index
+{
+ # Martin A. Hansen, July 2008
+
+ # Create a fixedStep index for PhastCons data.
+
+ my ( $file, # file to index
+ $dir, # dir with file
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $index );
+
+ $index = fixedstep_index_create( "$dir/$file" );
+
+ fixedstep_index_store( "$dir/$file.index", $index );
+}
+
+
+sub phastcons_parse_entry
+{
+ # Martin A. Hansen, December 2007.
+
+ # Given a PhastCons entry converts this to a
+ # list of super blocks.
+
+# $options->{ "min" } ||= 10;
+# $options->{ "dist" } ||= 25;
+# $options->{ "threshold" } ||= 0.8;
+# $options->{ "gap" } ||= 5;
+
+ my ( $lines, # list of lines
+ $args, # argument hash
+ ) = @_;
+
+ # Returns
+
+ my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
+
+ $info = shift @{ $lines };
+
+ if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
+ {
+ $chr = $1;
+ $beg = $2;
+ $step = $3;
+
+ die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
+ }
+
+ $i = 0;
+
+ while ( $i < @{ $lines } )
+ {
+ if ( $lines->[ $i ] >= $args->{ "threshold" } )
+ {
+ $c = $i + 1;
+
+ while ( $c < @{ $lines } )
+ {
+ if ( $lines->[ $c ] < $args->{ "threshold" } )
+ {
+ $j = $c + 1;
+
+ while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
+ $j++;
+ }
+
+ if ( $j - $c > $args->{ "gap" } )
+ {
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $j;
+
+ last;
+ }
+
+ $c = $j
+ }
+ else
+ {
+ $c++;
+ }
+ }
+
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $c;
+ }
+ else
+ {
+ $i++;
+ }
+ }
+
+ $i = 0;
+
+ while ( $i < @blocks )
+ {
+ $c = $i + 1;
+
+ while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
+ {
+ $c++;
+ }
+
+ push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
+
+ $i = $c;
+ }
+
+ foreach $super_block ( @super_blocks )
+ {
+ foreach $block ( @{ $super_block } )
+ {
+ push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
+ push @lens, $block->{ "CHR_LEN" } - 1;
+ }
+
+ $lens[ -1 ]++;
+
+ push @entries, {
+ CHR => $super_block->[ 0 ]->{ "CHR" },
+ CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
+ Q_ID => "Q_ID",
+ SCORE => 100,
+ STRAND => "+",
+ THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
+ COLOR => 0,
+ BLOCK_COUNT => scalar @{ $super_block },
+ BLOCK_LENS => join( ",", @lens ),
+ Q_BEGS => join( ",", @begs ),
+ };
+
+ undef @begs;
+ undef @lens;
+ }
+
+ return wantarray ? @entries : \@entries;
+}
+
+
sub phastcons_normalize
{
# Martin A. Hansen, January 2008.
# Normalizes a list of lists with PhastCons scores,
# in such a way that each list contains the same number
- # or PhastCons scores.
+ # of PhastCons scores.
my ( $AoA, # AoA with PhastCons scores
) = @_;
# splice @{ $list }, $pos, 0, "X";
}
- die qq(ERROR: bad inflate\n) if scalar @{ $list } != $len + $diff;
+ die qq(ERROR: Bad inflate\n) if scalar @{ $list } != $len + $diff;
}
splice @{ $list }, $pos, 1;
}
- die qq(ERROR: bad deflate\n) if scalar @{ $list } != $len - $diff;
+ die qq(ERROR: Dad deflate\n) if scalar @{ $list } != $len - $diff;
}
$beg, # start position
$block, # list of scores
$fh, # filehandle - OPTIONAL
+ $log10, # flag indicating that log10 scores should be used
) = @_;
# Returns nothing.
$beg += 1; # fixedStep format is 1 based.
- if ( $fh )
- {
- print $fh "fixedStep chrom=$chr start=$beg step=1\n";
+ $fh ||= \*STDOUT;
- map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block };
- }
- else
- {
- print "fixedStep chrom=$chr start=$beg step=1\n";
+ print $fh "fixedStep chrom=$chr start=$beg step=1\n";
- map { printf( "%d\n", ( $_ + 1 ) ) } @{ $block };
+ if ( $log10 ) {
+ map { printf( $fh "%f\n", Maasha::Calc::log10( $_ + 1 ) ) } @{ $block };
+ } else {
+ map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block };
}
}
# Maasha::Common::run( "hgLoadWiggle", "$args > /dev/null 2>&1" );
- `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
+ if ( $options->{ 'verbose' } ) {
+ `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file`;
+ } else {
+ `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
+ }
}
# Returns a string.
- my ( $fh, $line, $user );
+ my ( $file, $fh, $line, $user );
+
+ $file = "$ENV{ 'HOME' }/.hg.conf";
- $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+ return if not -f $file;
+
+ $fh = Maasha::Common::read_open( $file );
while ( $line = <$fh> )
{
# Returns a string.
- my ( $fh, $line, $password );
+ my ( $file, $fh, $line, $password );
+
+ $file = "$ENV{ 'HOME' }/.hg.conf";
+
+ return if not -f $file;
- $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+ $fh = Maasha::Common::read_open( $file );
while ( $line = <$fh> )
{