X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_perl%2FMaasha%2FUCSC.pm;h=5c188fd1ffbaa800cf5dd42cb8238297b4c0728d;hb=fe0b08151a523b9cc25b24d858f461b583b7ff69;hp=01cb884c7903a3ea62ee8222a5349d97be1101c4;hpb=b53a2cbe04bf29a5f3face92a9f980511200a5fb;p=biopieces.git diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 01cb884..5c188fd 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -28,41 +28,39 @@ package Maasha::UCSC; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +use warnings; use strict; use vars qw ( @ISA @EXPORT ); use Data::Dumper; -use Time::HiRes qw( gettimeofday ); - use Maasha::Common; +use Maasha::Filesys; use Maasha::Calc; use Maasha::Matrix; use constant { - CHR_BEG => 0, - NEXT_CHR_BEG => 1, + FS_CHR_BEG => 0, + FS_NEXT_CHR_BEG => 1, + FS_CHR_END => 2, + FS_INDEX_BEG => 3, + FS_INDEX_LEN => 4, + + CHR => 0, + CHR_BEG => 1, CHR_END => 2, - INDEX_BEG => 3, - INDEX_LEN => 4, - - CHR => 0, - CHR_BEG => 1, - CHR_END => 2, - Q_ID => 3, - SCORE => 4, - STRAND => 5, - THICK_BEG => 6, - THICK_END => 7, - ITEMRGB => 8, - BLOCKCOUNT => 9, - BLOCKSIZES => 10, - Q_BEGS => 11, + Q_ID => 3, + SCORE => 4, + STRAND => 5, + THICK_BEG => 6, + THICK_END => 7, + COLOR => 8, + BLOCK_COUNT => 9, + BLOCK_LENS => 10, + Q_BEGS => 11, }; @ISA = qw( Exporter ); -my $TIME = gettimeofday(); - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -174,18 +172,18 @@ sub bed_get_entry elsif ( $columns == 12 ) { %entry = ( - "CHR" => $fields[ 0 ], - "CHR_BEG" => $fields[ 1 ], - "CHR_END" => $fields[ 2 ] - 1, - "Q_ID" => $fields[ 3 ], - "SCORE" => $fields[ 4 ], - "STRAND" => $fields[ 5 ], - "THICK_BEG" => $fields[ 6 ], - "THICK_END" => $fields[ 7 ] - 1, - "ITEMRGB" => $fields[ 8 ], - "BLOCKCOUNT" => $fields[ 9 ], - "BLOCKSIZES" => $fields[ 10 ], - "Q_BEGS" => $fields[ 11 ], + "CHR" => $fields[ 0 ], + "CHR_BEG" => $fields[ 1 ], + "CHR_END" => $fields[ 2 ] - 1, + "Q_ID" => $fields[ 3 ], + "SCORE" => $fields[ 4 ], + "STRAND" => $fields[ 5 ], + "THICK_BEG" => $fields[ 6 ], + "THICK_END" => $fields[ 7 ] - 1, + "COLOR" => $fields[ 8 ], + "BLOCK_COUNT" => $fields[ 9 ], + "BLOCK_LENS" => $fields[ 10 ], + "Q_BEGS" => $fields[ 11 ], ); } else @@ -325,9 +323,9 @@ sub bed_put_entry push @fields, $record->{ "STRAND" }; push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" }; push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" }; - push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" }; - push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" }; - push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" }; + push @fields, $record->{ "COLOR" } if defined $record->{ "COLOR" }; + push @fields, $record->{ "BLOCK_COUNT" } if defined $record->{ "BLOCK_COUNT" }; + push @fields, $record->{ "BLOCK_LENS" } if defined $record->{ "BLOCK_LENS" }; push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" }; } @@ -374,14 +372,14 @@ sub bed_analyze $intron_max = 0; $intron_min = 9999999999; - $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" }; + $entry->{ "EXONS" } = $entry->{ "BLOCK_COUNT" }; @begs = split /,/, $entry->{ "Q_BEGS" }; - @lens = split /,/, $entry->{ "BLOCKSIZES" }; + @lens = split /,/, $entry->{ "BLOCK_LENS" }; - for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ ) + for ( $i = 0; $i < $entry->{ "BLOCK_COUNT" }; $i++ ) { - $exon_len = @lens[ $i ]; + $exon_len = $lens[ $i ]; $entry->{ "EXON_LEN_$i" } = $exon_len; @@ -396,14 +394,14 @@ sub bed_analyze $entry->{ "EXON_MIN_LEN" } = $exon_min; $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } ); - $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1; + $entry->{ "INTRONS" } = $entry->{ "BLOCK_COUNT" } - 1; $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0; if ( $entry->{ "INTRONS" } ) { - for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ ) + for ( $i = 1; $i < $entry->{ "BLOCK_COUNT" }; $i++ ) { - $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] ); + $intron_len = $begs[ $i ] - ( $begs[ $i - 1 ] + $lens[ $i - 1 ] ); $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len; @@ -513,7 +511,7 @@ sub bed_merge_entries if ( exists $entries->[ $i ]->{ "Q_BEGS" } ) { @q_begs = split ",", $entries->[ $i ]->{ "Q_BEGS" }; - @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" }; + @blocksizes = split ",", $entries->[ $i ]->{ "BLOCK_LENS" }; } else { @@ -530,21 +528,21 @@ sub bed_merge_entries map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs; %new_entry = ( - CHR => $entries->[ 0 ]->{ "CHR" }, - CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" }, - CHR_END => $entries->[ -1 ]->{ "CHR_END" }, - REC_TYPE => "BED", - BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1, - BED_COLS => 12, - Q_ID => join( ":", @q_ids ), - SCORE => 999, - STRAND => $entries->[ 0 ]->{ "STRAND" } || "+", - THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" }, - THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" }, - ITEMRGB => "0,0,0", - BLOCKCOUNT => scalar @new_q_begs, - BLOCKSIZES => join( ",", @new_blocksizes ), - Q_BEGS => join( ",", @new_q_begs ), + CHR => $entries->[ 0 ]->{ "CHR" }, + CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" }, + CHR_END => $entries->[ -1 ]->{ "CHR_END" }, + REC_TYPE => "BED", + BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1, + BED_COLS => 12, + Q_ID => join( ":", @q_ids ), + SCORE => 999, + STRAND => $entries->[ 0 ]->{ "STRAND" } || "+", + THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" }, + THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" }, + COLOR => 0, + BLOCK_COUNT => scalar @new_q_begs, + BLOCK_LENS => join( ",", @new_blocksizes ), + Q_BEGS => join( ",", @new_q_begs ), ); return wantarray ? %new_entry : \%new_entry; @@ -565,10 +563,10 @@ sub bed_split_entry my ( @q_begs, @blocksizes, $block, @blocks, $i ); - if ( exists $entry->{ "BLOCKCOUNT" } ) + if ( exists $entry->{ "BLOCK_COUNT" } ) { @q_begs = split ",", $entry->{ "Q_BEGS" }; - @blocksizes = split ",", $entry->{ "BLOCKSIZES" }; + @blocksizes = split ",", $entry->{ "BLOCK_LENS" }; for ( $i = 0; $i < @q_begs; $i++ ) { @@ -644,11 +642,11 @@ sub bed_upload_to_ucsc $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", $file; } - if ( $options->{ "sec_struct" } ) + if ( $options->{ "table" } =~ /rnaSecStr/ ) { $table = $options->{ "table" }; - Maasha::Common::error( "Attempt to load secondary structure track without 'rnaSecStr' in table name" ) if not $table =~ /rnaSecStr/; + print qq(uploading secondary structure table:"$table"\n) if $options->{ "verbose" }; $sql_file = "$tmp_dir/upload_RNA_SS.sql"; @@ -686,220 +684,127 @@ CREATE TABLE $table ( } -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -sub psl_get_entry +sub psl_upload_to_ucsc { - # Martin A. Hansen, August 2008. + # Martin A. Hansen, September 2007. - # Reads PSL next entry from a PSL file and returns a record. + # Upload a PSL file to the UCSC database. - my ( $fh, # file handle of PSL filefull path to PSL file + my ( $file, # file to upload, + $options, # argument hashref + $append, # flag indicating table should be appended ) = @_; - # Returns hashref. - - my ( $line, @fields, %record ); - - while ( $line = <$fh> ) - { - chomp $line; + # Returns nothing. - @fields = split "\t", $line; + my ( $args ); - if ( scalar @fields == 21 ) - { - %record = ( - REC_TYPE => "PSL", - MATCHES => $fields[ 0 ], - MISMATCHES => $fields[ 1 ], - REPMATCHES => $fields[ 2 ], - NCOUNT => $fields[ 3 ], - QNUMINSERT => $fields[ 4 ], - QBASEINSERT => $fields[ 5 ], - SNUMINSERT => $fields[ 6 ], - SBASEINSERT => $fields[ 7 ], - STRAND => $fields[ 8 ], - Q_ID => $fields[ 9 ], - Q_LEN => $fields[ 10 ], - Q_BEG => $fields[ 11 ], - Q_END => $fields[ 12 ] - 1, - S_ID => $fields[ 13 ], - S_LEN => $fields[ 14 ], - S_BEG => $fields[ 15 ], - S_END => $fields[ 16 ] - 1, - BLOCKCOUNT => $fields[ 17 ], - BLOCKSIZES => $fields[ 18 ], - Q_BEGS => $fields[ 19 ], - S_BEGS => $fields[ 20 ], - ); - - $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" }; - $record{ "SPAN" } = $fields[ 12 ] - $fields[ 11 ]; - - return wantarray ? %record : \%record; - } + if ( $append ) { + $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file; + } else { + $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file; } - return undef; + Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" ); } -sub psl_get_entries -{ - # Martin A. Hansen, February 2008. - - # Reads PSL entries and returns a list of records. - - my ( $path, # full path to PSL file - ) = @_; - - # Returns hashref. - - my ( $fh, @lines, @fields, $i, %record, @records ); - - $fh = Maasha::Common::read_open( $path ); - - @lines = <$fh>; - - close $fh; - - chomp @lines; - - for ( $i = 5; $i < @lines; $i++ ) - { - @fields = split "\t", $lines[ $i ]; - - Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21; - - undef %record; - - %record = ( - REC_TYPE => "PSL", - MATCHES => $fields[ 0 ], - MISMATCHES => $fields[ 1 ], - REPMATCHES => $fields[ 2 ], - NCOUNT => $fields[ 3 ], - QNUMINSERT => $fields[ 4 ], - QBASEINSERT => $fields[ 5 ], - SNUMINSERT => $fields[ 6 ], - SBASEINSERT => $fields[ 7 ], - STRAND => $fields[ 8 ], - Q_ID => $fields[ 9 ], - Q_LEN => $fields[ 10 ], - Q_BEG => $fields[ 11 ], - Q_END => $fields[ 12 ] - 1, - S_ID => $fields[ 13 ], - S_LEN => $fields[ 14 ], - S_BEG => $fields[ 15 ], - S_END => $fields[ 16 ] - 1, - BLOCKCOUNT => $fields[ 17 ], - BLOCKSIZES => $fields[ 18 ], - Q_BEGS => $fields[ 19 ], - S_BEGS => $fields[ 20 ], - ); - - $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" }; - - push @records, { %record }; - } - - return wantarray ? @records : \@records; -} +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -sub psl_put_header +sub ucsc_config_entry_get { - # Martin A. Hansen, September 2007. + # Martin A. Hansen, November 2008. - # Write a PSL header to file. + # Given a filehandle to a UCSC Genome Browser + # config file (.ra) get the next entry and + # return as a hash. Entries are separated by blank + # lines. # lines are skipped unless it is the lines: + # # Track added ... + # # Database ... - my ( $fh, # file handle - OPTIONAL + my ( $fh, # file hanlde ) = @_; - # Returns nothing. - - $fh = \*STDOUT if not $fh; + # Returns a hashref. - print $fh qq(psLayout version 3 -match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart match match count bases count bases name size start end name size start end count ---------------------------------------------------------------------------------------------------------------------------------------------------------------- -); -} + my ( $line, %record ); + while ( $line = <$fh> ) + { + $line =~ tr/\n\r//d; + + if ( $line =~ /Track added by 'upload_to_ucsc' (\S+) (\S+)/) { + $record{ 'date' } = $1; + $record{ 'time' } = $2; + } elsif ( $line =~ /^# date: (.+)/ ) { + $record{ 'date' } = $1; + } elsif ( $line =~ /^# time: (.+)/ ) { + $record{ 'time' } = $1; + } elsif ( $line =~ /^# (?:database:|Database) (.+)/ ) { + $record{ 'database' } = $1; + } elsif ( $line =~ /^#/ ) { + # skip + } elsif ( $line =~ /(\S+)\s+(.+)/ ) { + $record{ $1 } = $2; + } -sub psl_put_entry -{ - # Martin A. Hansen, September 2007. - - # Write a PSL entry to file. - - my ( $record, # hashref - $fh, # file handle - OPTIONAL - ) = @_; + last if $line =~ /^$/ and exists $record{ "track" }; + } - # Returns nothing. + return undef if not exists $record{ "track" }; - $fh = \*STDOUT if not $fh; - - my @output; - - push @output, $record->{ "MATCHES" }; - push @output, $record->{ "MISMATCHES" }; - push @output, $record->{ "REPMATCHES" }; - push @output, $record->{ "NCOUNT" }; - push @output, $record->{ "QNUMINSERT" }; - push @output, $record->{ "QBASEINSERT" }; - push @output, $record->{ "SNUMINSERT" }; - push @output, $record->{ "SBASEINSERT" }; - push @output, $record->{ "STRAND" }; - push @output, $record->{ "Q_ID" }; - push @output, $record->{ "Q_LEN" }; - push @output, $record->{ "Q_BEG" }; - push @output, $record->{ "Q_END" } + 1; - push @output, $record->{ "S_ID" }; - push @output, $record->{ "S_LEN" }; - push @output, $record->{ "S_BEG" }; - push @output, $record->{ "S_END" } + 1; - push @output, $record->{ "BLOCKCOUNT" }; - push @output, $record->{ "BLOCKSIZES" }; - push @output, $record->{ "Q_BEGS" }; - push @output, $record->{ "S_BEGS" }; - - print $fh join( "\t", @output ), "\n"; + return wantarray ? %record : \%record; } -sub psl_upload_to_ucsc +sub ucsc_config_entry_put { - # Martin A. Hansen, September 2007. + # Martin A. Hansen, November 2008. - # Upload a PSL file to the UCSC database. + # Outputs a Biopiece record (a hashref) + # to a filehandle or STDOUT. - my ( $file, # file to upload, - $options, # argument hashref - $append, # flag indicating table should be appended + my ( $record, # hashref + $fh_out, # file handle ) = @_; # Returns nothing. - my ( $args ); - - if ( $append ) { - $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file; - } else { - $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file; - } - - Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" ); + my ( $date, $time ); + + $fh_out ||= \*STDOUT; + + ( $date, $time ) = split " ", Maasha::Common::time_stamp(); + + $record->{ "date" } ||= $date; + $record->{ "time" } ||= $time; + + print $fh_out "\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"; + + map { print $fh_out "# $_: $record->{ $_ }\n" if exists $record->{ $_ } } qw( date time database ); + map { print $fh_out "$_ $record->{ $_ }\n" if exists $record->{ $_ } } qw( track + name + description + itemRgb + db + offset + url + htmlUrl + shortLabel + longLabel + group + priority + useScore + visibility + maxHeightPixels + color + mafTrack + type ); } -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -sub update_my_tracks +sub ucsc_update_config { # Martin A. Hansen, September 2007. @@ -911,70 +816,48 @@ sub update_my_tracks # Returns nothing. - my ( $file, $fh_in, $fh_out, $line, $time ); + my ( $file, $entry, %new_entry, $fh_in, $fh_out, $was_found ); $file = $ENV{ "HOME" } . "/ucsc/my_tracks.ra"; - # ---- create a backup ---- - - $fh_in = Maasha::Common::read_open( $file ); - $fh_out = Maasha::Common::write_open( "$file~" ); - - while ( $line = <$fh_in> ) { - print $fh_out $line; - } - - close $fh_in; - close $fh_out; - - # ---- append track ---- - - $time = Maasha::Common::time_stamp(); - - $fh_out = Maasha::Common::append_open( $file ); + Maasha::Filesys::file_copy( $file, "$file~" ); # create a backup - if ( $type eq "sec_struct" ) - { - print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"; - - print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n"; + %new_entry = ( + database => $options->{ 'database' }, + track => $options->{ 'table' }, + shortLabel => $options->{ 'short_label' }, + longLabel => $options->{ 'long_label' }, + group => $options->{ 'group' }, + priority => $options->{ 'priority' }, + visibility => $options->{ 'visibility' }, + color => $options->{ 'color' }, + type => $type, + ); - print $fh_out "# Database $options->{ 'database' }\n\n"; + $new_entry{ 'useScore' } = 1 if $options->{ 'use_score' }; + $new_entry{ 'mafTrack' } = "multiz17way" if $type eq "type bed 6 +"; + $new_entry{ 'maxHeightPixels' } = "50:50:11" if $type eq "wig 0"; - print $fh_out "track $options->{ 'table' }\n"; - print $fh_out "shortLabel $options->{ 'short_label' }\n"; - print $fh_out "longLabel $options->{ 'long_label' }\n"; - print $fh_out "group $options->{ 'group' }\n"; - print $fh_out "priority $options->{ 'priority' }\n"; - print $fh_out "visibility $options->{ 'visibility' }\n"; - print $fh_out "color $options->{ 'color' }\n"; - print $fh_out "type bed 6 +\n"; - print $fh_out "mafTrack multiz17way\n"; + $fh_in = Maasha::Filesys::file_read_open( "$file~" ); + $fh_out = Maasha::Filesys::file_write_open( $file ); - print $fh_out "\n# //\n"; - } - else + while ( $entry = ucsc_config_entry_get( $fh_in ) ) { - print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"; - - print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n"; - - print $fh_out "# Database $options->{ 'database' }\n\n"; - - print $fh_out "track $options->{ 'table' }\n"; - print $fh_out "shortLabel $options->{ 'short_label' }\n"; - print $fh_out "longLabel $options->{ 'long_label' }\n"; - print $fh_out "group $options->{ 'group' }\n"; - print $fh_out "priority $options->{ 'priority' }\n"; - print $fh_out "useScore 1\n" if $options->{ 'use_score' }; - print $fh_out "visibility $options->{ 'visibility' }\n"; - print $fh_out "maxHeightPixels 50:50:11\n" if $type eq "wig 0"; - print $fh_out "color $options->{ 'color' }\n"; - print $fh_out "type $type\n"; + if ( $entry->{ 'database' } eq $new_entry{ 'database' } and $entry->{ 'track' } eq $new_entry{ 'track' } ) + { + ucsc_config_entry_put( \%new_entry, $fh_out ); - print $fh_out "\n# //\n"; + $was_found = 1; + } + else + { + ucsc_config_entry_put( $entry, $fh_out ); + } } + ucsc_config_entry_put( \%new_entry, $fh_out ) if not $was_found; + + close $fh_in; close $fh_out; Maasha::Common::run( "ucscMakeTracks.pl", "-b > /dev/null 2>&1" ); @@ -1067,10 +950,10 @@ sub fixedstep_index_create foreach $chr ( keys %index ) { for ( $i = 0; $i < @{ $index{ $chr } } - 1; $i++ ) { - $index{ $chr }->[ $i ]->[ NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ]; + $index{ $chr }->[ $i ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ]; } - $index{ $chr }->[ -1 ]->[ NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ CHR_END ] + 1; + $index{ $chr }->[ -1 ]->[ FS_NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ FS_CHR_END ] + 1; } return wantarray ? %index : \%index; @@ -1137,7 +1020,7 @@ sub fixedstep_index_lookup $chr_beg -= $flank; $chr_end += $flank; -# print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n"; + # print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n"; if ( exists $index->{ $chr } ) { @@ -1164,14 +1047,14 @@ sub fixedstep_index_lookup if ( $index_beg == $index_end ) { - $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ); - $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ CHR_END ] ); + $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ); + $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] ); - if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ) + if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ) { @vals = split "\n", Maasha::Common::file_read( $fh, - $index->{ $chr }->[ $index_beg ]->[ INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ), + $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ), 6 * ( $end - $beg + 1 ), ); } @@ -1182,21 +1065,21 @@ sub fixedstep_index_lookup } else { - $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ); + $beg = Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ); # print Dumper( $beg, $index->{ $chr }->[ $index_beg ] ); -# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ NEXT_CHR_BEG ] ); +# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ FS_NEXT_CHR_BEG ] ); # beg next # v v # |||||||||....... - if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ CHR_END ] ) + if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] ) { @vals = split "\n", Maasha::Common::file_read( $fh, - $index->{ $chr }->[ $index_beg ]->[ INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ), - 6 * ( $index->{ $chr }->[ $index_beg ]->[ CHR_END ] - $beg + 1 ), + $index->{ $chr }->[ $index_beg ]->[ FS_INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ FS_CHR_BEG ] ), + 6 * ( $index->{ $chr }->[ $index_beg ]->[ FS_CHR_END ] - $beg + 1 ), ); for ( $c = 0; $c < @vals; $c++ ) { @@ -1204,18 +1087,18 @@ sub fixedstep_index_lookup } } - $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ CHR_END ] ); + $end = Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] ); - if ( $end <= $index->{ $chr }->[ $index_end ]->[ CHR_END ] ) + if ( $end <= $index->{ $chr }->[ $index_end ]->[ FS_CHR_END ] ) { @vals = split "\n", Maasha::Common::file_read( $fh, - $index->{ $chr }->[ $index_end ]->[ INDEX_BEG ], - 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ CHR_BEG ] + 1 ), + $index->{ $chr }->[ $index_end ]->[ FS_INDEX_BEG ], + 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] + 1 ), ); for ( $c = 0; $c < @vals; $c++ ) { - $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ CHR_BEG ] - $chr_beg ] = $vals[ $c ]; + $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ]; } } @@ -1223,12 +1106,12 @@ sub fixedstep_index_lookup { @vals = split "\n", Maasha::Common::file_read( $fh, - $index->{ $chr }->[ $i ]->[ INDEX_BEG ], - 6 * ( $index->{ $chr }->[ $i ]->[ CHR_END ] - $index->{ $chr }->[ $i ]->[ CHR_BEG ] + 1 ), + $index->{ $chr }->[ $i ]->[ FS_INDEX_BEG ], + 6 * ( $index->{ $chr }->[ $i ]->[ FS_CHR_END ] - $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] + 1 ), ); for ( $c = 0; $c < @vals; $c++ ) { - $scores->[ $c + $index->{ $chr }->[ $i ]->[ CHR_BEG ] - $chr_beg ] = $vals[ $c ]; + $scores->[ $c + $index->{ $chr }->[ $i ]->[ FS_CHR_BEG ] - $chr_beg ] = $vals[ $c ]; } } } @@ -1272,6 +1155,11 @@ sub phastcons_parse_entry # Given a PhastCons entry converts this to a # list of super blocks. +# $options->{ "min" } ||= 10; +# $options->{ "dist" } ||= 25; +# $options->{ "threshold" } ||= 0.8; +# $options->{ "gap" } ||= 5; + my ( $lines, # list of lines $args, # argument hash ) = @_; @@ -1379,18 +1267,18 @@ sub phastcons_parse_entry $lens[ -1 ]++; push @entries, { - CHR => $super_block->[ 0 ]->{ "CHR" }, - CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, - CHR_END => $super_block->[ -1 ]->{ "CHR_END" }, - Q_ID => "Q_ID", - SCORE => 100, - STRAND => "+", - THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, - THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1, - ITEMRGB => "0,200,100", - BLOCKCOUNT => scalar @{ $super_block }, - BLOCKSIZES => join( ",", @lens ), - Q_BEGS => join( ",", @begs ), + CHR => $super_block->[ 0 ]->{ "CHR" }, + CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, + CHR_END => $super_block->[ -1 ]->{ "CHR_END" }, + Q_ID => "Q_ID", + SCORE => 100, + STRAND => "+", + THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, + THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1, + COLOR => 0, + BLOCK_COUNT => scalar @{ $super_block }, + BLOCK_LENS => join( ",", @lens ), + Q_BEGS => join( ",", @begs ), }; undef @begs; @@ -1407,7 +1295,7 @@ sub phastcons_normalize # Normalizes a list of lists with PhastCons scores, # in such a way that each list contains the same number - # or PhastCons scores. + # of PhastCons scores. my ( $AoA, # AoA with PhastCons scores ) = @_; @@ -1471,7 +1359,7 @@ sub phastcons_list_inflate # splice @{ $list }, $pos, 0, "X"; } - die qq(ERROR: bad inflate\n) if scalar @{ $list } != $len + $diff; + die qq(ERROR: Bad inflate\n) if scalar @{ $list } != $len + $diff; } @@ -1501,7 +1389,7 @@ sub phastcons_list_deflate splice @{ $list }, $pos, 1; } - die qq(ERROR: bad deflate\n) if scalar @{ $list } != $len - $diff; + die qq(ERROR: Dad deflate\n) if scalar @{ $list } != $len - $diff; } @@ -1694,9 +1582,13 @@ sub ucsc_get_user # Returns a string. - my ( $fh, $line, $user ); + my ( $file, $fh, $line, $user ); + + $file = "$ENV{ 'HOME' }/.hg.conf"; - $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" ); + return if not -f $file; + + $fh = Maasha::Common::read_open( $file ); while ( $line = <$fh> ) { @@ -1725,9 +1617,13 @@ sub ucsc_get_password # Returns a string. - my ( $fh, $line, $password ); + my ( $file, $fh, $line, $password ); + + $file = "$ENV{ 'HOME' }/.hg.conf"; + + return if not -f $file; - $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" ); + $fh = Maasha::Common::read_open( $file ); while ( $line = <$fh> ) {