]> git.donarmstrong.com Git - biopieces.git/blobdiff - code_perl/Maasha/UCSC.pm
added format to compute
[biopieces.git] / code_perl / Maasha / UCSC.pm
index 84663db27285ed6585c146891f3f4f75055d2ddb..5c188fd1ffbaa800cf5dd42cb8238297b4c0728d 100644 (file)
@@ -28,6 +28,7 @@ package Maasha::UCSC;
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
+use warnings;
 use strict;
 use vars qw ( @ISA @EXPORT );
 
@@ -52,9 +53,9 @@ use constant {
     STRAND       => 5,
     THICK_BEG    => 6,
     THICK_END    => 7,
-    ITEMRGB      => 8,
-    BLOCKCOUNT   => 9,
-    BLOCKSIZES   => 10,
+    COLOR        => 8,
+    BLOCK_COUNT  => 9,
+    BLOCK_LENS   => 10,
     Q_BEGS       => 11,
 };
 
@@ -171,18 +172,18 @@ sub bed_get_entry
     elsif ( $columns == 12 )
     {
         %entry = (
-            "CHR"        => $fields[ 0 ],
-            "CHR_BEG"    => $fields[ 1 ],
-            "CHR_END"    => $fields[ 2 ] - 1,
-            "Q_ID"       => $fields[ 3 ],
-            "SCORE"      => $fields[ 4 ],
-            "STRAND"     => $fields[ 5 ],
-            "THICK_BEG"  => $fields[ 6 ],
-            "THICK_END"  => $fields[ 7 ] - 1,
-            "ITEMRGB"    => $fields[ 8 ],
-            "BLOCKCOUNT" => $fields[ 9 ],
-            "BLOCKSIZES" => $fields[ 10 ],
-            "Q_BEGS"     => $fields[ 11 ],
+            "CHR"         => $fields[ 0 ],
+            "CHR_BEG"     => $fields[ 1 ],
+            "CHR_END"     => $fields[ 2 ] - 1,
+            "Q_ID"        => $fields[ 3 ],
+            "SCORE"       => $fields[ 4 ],
+            "STRAND"      => $fields[ 5 ],
+            "THICK_BEG"   => $fields[ 6 ],
+            "THICK_END"   => $fields[ 7 ] - 1,
+            "COLOR"       => $fields[ 8 ],
+            "BLOCK_COUNT" => $fields[ 9 ],
+            "BLOCK_LENS"  => $fields[ 10 ],
+            "Q_BEGS"      => $fields[ 11 ],
         );
     }
     else
@@ -322,9 +323,9 @@ sub bed_put_entry
         push @fields, $record->{ "STRAND" };
         push @fields, $record->{ "THICK_BEG" }     if defined $record->{ "THICK_BEG" };
         push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" };
-        push @fields, $record->{ "ITEMRGB" }       if defined $record->{ "ITEMRGB" };
-        push @fields, $record->{ "BLOCKCOUNT" }    if defined $record->{ "BLOCKCOUNT" };
-        push @fields, $record->{ "BLOCKSIZES" }    if defined $record->{ "BLOCKSIZES" };
+        push @fields, $record->{ "COLOR" }         if defined $record->{ "COLOR" };
+        push @fields, $record->{ "BLOCK_COUNT" }   if defined $record->{ "BLOCK_COUNT" };
+        push @fields, $record->{ "BLOCK_LENS" }    if defined $record->{ "BLOCK_LENS" };
         push @fields, $record->{ "Q_BEGS" }        if defined $record->{ "Q_BEGS" };
     }
 
@@ -371,14 +372,14 @@ sub bed_analyze
     $intron_max = 0;
     $intron_min = 9999999999;
 
-    $entry->{ "EXONS" }   = $entry->{ "BLOCKCOUNT" };
+    $entry->{ "EXONS" }   = $entry->{ "BLOCK_COUNT" };
 
     @begs = split /,/, $entry->{ "Q_BEGS" };
-    @lens = split /,/, $entry->{ "BLOCKSIZES" };
+    @lens = split /,/, $entry->{ "BLOCK_LENS" };
 
-    for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
+    for ( $i = 0; $i < $entry->{ "BLOCK_COUNT" }; $i++ )
     {
-        $exon_len = @lens[ $i ];
+        $exon_len = $lens[ $i ];
 
         $entry->{ "EXON_LEN_$i" } = $exon_len;
 
@@ -393,14 +394,14 @@ sub bed_analyze
     $entry->{ "EXON_MIN_LEN" }  = $exon_min;
     $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } );
 
-    $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1;
+    $entry->{ "INTRONS" } = $entry->{ "BLOCK_COUNT" } - 1;
     $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0;
 
     if ( $entry->{ "INTRONS" } )
     {
-        for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
+        for ( $i = 1; $i < $entry->{ "BLOCK_COUNT" }; $i++ )
         {
-            $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] );
+            $intron_len = $begs[ $i ] - ( $begs[ $i - 1 ] + $lens[ $i - 1 ] );
 
             $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len;
 
@@ -510,7 +511,7 @@ sub bed_merge_entries
         if ( exists $entries->[ $i ]->{ "Q_BEGS" } )
         {
             @q_begs     = split ",", $entries->[ $i ]->{ "Q_BEGS" };
-            @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" };
+            @blocksizes = split ",", $entries->[ $i ]->{ "BLOCK_LENS" };
         }
         else
         {
@@ -527,21 +528,21 @@ sub bed_merge_entries
     map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs;
 
     %new_entry = (
-        CHR         => $entries->[ 0 ]->{ "CHR" },
-        CHR_BEG     => $entries->[ 0 ]->{ "CHR_BEG" },
-        CHR_END     => $entries->[ -1 ]->{ "CHR_END" },
-        REC_TYPE    => "BED",
-        BED_LEN     => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
-        BED_COLS    => 12,
-        Q_ID        => join( ":", @q_ids ),
-        SCORE       => 999,
-        STRAND      => $entries->[ 0 ]->{ "STRAND" }     || "+",
-        THICK_BEG   => $entries->[ 0 ]->{ "THICK_BEG" }  || $entries->[ 0 ]->{ "CHR_BEG" },
-        THICK_END   => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
-        ITEMRGB     => "0,0,0",
-        BLOCKCOUNT  => scalar @new_q_begs,
-        BLOCKSIZES  => join( ",", @new_blocksizes ),
-        Q_BEGS      => join( ",", @new_q_begs ),
+        CHR          => $entries->[ 0 ]->{ "CHR" },
+        CHR_BEG      => $entries->[ 0 ]->{ "CHR_BEG" },
+        CHR_END      => $entries->[ -1 ]->{ "CHR_END" },
+        REC_TYPE     => "BED",
+        BED_LEN      => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
+        BED_COLS     => 12,
+        Q_ID         => join( ":", @q_ids ),
+        SCORE        => 999,
+        STRAND       => $entries->[ 0 ]->{ "STRAND" }     || "+",
+        THICK_BEG    => $entries->[ 0 ]->{ "THICK_BEG" }  || $entries->[ 0 ]->{ "CHR_BEG" },
+        THICK_END    => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
+        COLOR        => 0,
+        BLOCK_COUNT  => scalar @new_q_begs,
+        BLOCK_LENS   => join( ",", @new_blocksizes ),
+        Q_BEGS       => join( ",", @new_q_begs ),
     );
 
     return wantarray ? %new_entry : \%new_entry;
@@ -562,10 +563,10 @@ sub bed_split_entry
 
     my ( @q_begs, @blocksizes, $block, @blocks, $i );
 
-    if ( exists $entry->{ "BLOCKCOUNT" } )
+    if ( exists $entry->{ "BLOCK_COUNT" } )
     {
         @q_begs     = split ",", $entry->{ "Q_BEGS" };
-        @blocksizes = split ",", $entry->{ "BLOCKSIZES" };
+        @blocksizes = split ",", $entry->{ "BLOCK_LENS" };
         
         for ( $i = 0; $i < @q_begs; $i++ )
         {
@@ -683,191 +684,6 @@ CREATE TABLE $table (
 }
 
 
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub psl_get_entry
-{
-    # Martin A. Hansen, August 2008.
-
-    # Reads PSL next entry from a PSL file and returns a record.
-
-    my ( $fh,   # file handle of PSL filefull path to PSL file
-       ) = @_;
-
-    # Returns hashref.
-
-    my ( $line, @fields, %record );
-
-    while ( $line = <$fh> )
-    {
-        chomp $line;
-
-        @fields = split "\t", $line;
-
-        if ( scalar @fields == 21 )
-        {
-            %record = (
-                REC_TYPE    => "PSL",
-                MATCHES     => $fields[ 0 ],
-                MISMATCHES  => $fields[ 1 ],
-                REPMATCHES  => $fields[ 2 ],
-                NCOUNT      => $fields[ 3 ],
-                QNUMINSERT  => $fields[ 4 ],
-                QBASEINSERT => $fields[ 5 ],
-                SNUMINSERT  => $fields[ 6 ],
-                SBASEINSERT => $fields[ 7 ],
-                STRAND      => $fields[ 8 ],
-                Q_ID        => $fields[ 9 ],
-                Q_LEN       => $fields[ 10 ],
-                Q_BEG       => $fields[ 11 ],
-                Q_END       => $fields[ 12 ] - 1,
-                S_ID        => $fields[ 13 ],
-                S_LEN       => $fields[ 14 ],
-                S_BEG       => $fields[ 15 ],
-                S_END       => $fields[ 16 ] - 1,
-                BLOCKCOUNT  => $fields[ 17 ],
-                BLOCKSIZES  => $fields[ 18 ],
-                Q_BEGS      => $fields[ 19 ],
-                S_BEGS      => $fields[ 20 ],
-            );
-
-            $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
-            $record{ "SPAN" }  = $fields[ 12 ] - $fields[ 11 ];
-    
-            return wantarray ? %record : \%record;
-        }
-    }
-
-    return undef;
-}
-
-
-sub psl_get_entries
-{
-    # Martin A. Hansen, February 2008.
-
-    # Reads PSL entries and returns a list of records.
-
-    my ( $path,   # full path to PSL file
-       ) = @_;
-
-    # Returns hashref.
-
-    my ( $fh, @lines, @fields, $i, %record, @records );
-
-    $fh = Maasha::Common::read_open( $path );
-
-    @lines = <$fh>;
-
-    close $fh;
-
-    chomp @lines;
-
-    for ( $i = 5; $i < @lines; $i++ )
-    {
-        @fields = split "\t", $lines[ $i ];
-
-        Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21;
-
-        undef %record;
-
-        %record = (
-            REC_TYPE    => "PSL",
-            MATCHES     => $fields[ 0 ],
-            MISMATCHES  => $fields[ 1 ],
-            REPMATCHES  => $fields[ 2 ],
-            NCOUNT      => $fields[ 3 ],
-            QNUMINSERT  => $fields[ 4 ],
-            QBASEINSERT => $fields[ 5 ],
-            SNUMINSERT  => $fields[ 6 ],
-            SBASEINSERT => $fields[ 7 ],
-            STRAND      => $fields[ 8 ],
-            Q_ID        => $fields[ 9 ],
-            Q_LEN       => $fields[ 10 ],
-            Q_BEG       => $fields[ 11 ],
-            Q_END       => $fields[ 12 ] - 1,
-            S_ID        => $fields[ 13 ],
-            S_LEN       => $fields[ 14 ],
-            S_BEG       => $fields[ 15 ],
-            S_END       => $fields[ 16 ] - 1,
-            BLOCKCOUNT  => $fields[ 17 ],
-            BLOCKSIZES  => $fields[ 18 ],
-            Q_BEGS      => $fields[ 19 ],
-            S_BEGS      => $fields[ 20 ],
-        );
-
-        $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
-    
-        push @records, { %record };
-    }
-
-    return wantarray ? @records : \@records;
-}
-
-
-sub psl_put_header
-{
-    # Martin A. Hansen, September 2007.
-
-    # Write a PSL header to file.
-
-    my ( $fh,  # file handle  - OPTIONAL
-       ) = @_;
-
-    # Returns nothing.
-
-    $fh = \*STDOUT if not $fh;
-
-    print $fh qq(psLayout version 3
-match   mis-    rep.    N's     Q gap   Q gap   T gap   T gap   strand  Q               Q       Q       Q       T               T       T       T       block   blockSizes      qStart        match   match           count   bases   count   bases           name            size    start   end     name            size    start   end     count
---------------------------------------------------------------------------------------------------------------------------------------------------------------- 
-);
-}
-
-
-sub psl_put_entry
-{
-    # Martin A. Hansen, September 2007.
-
-    # Write a PSL entry to file.
-
-    my ( $record,       # hashref
-         $fh,           # file handle  -  OPTIONAL
-       ) = @_;
-
-    # Returns nothing.
-
-    $fh = \*STDOUT if not $fh;
-
-    my @output;
-
-    push @output, $record->{ "MATCHES" };
-    push @output, $record->{ "MISMATCHES" };
-    push @output, $record->{ "REPMATCHES" };
-    push @output, $record->{ "NCOUNT" };
-    push @output, $record->{ "QNUMINSERT" };
-    push @output, $record->{ "QBASEINSERT" };
-    push @output, $record->{ "SNUMINSERT" };
-    push @output, $record->{ "SBASEINSERT" };
-    push @output, $record->{ "STRAND" };
-    push @output, $record->{ "Q_ID" };
-    push @output, $record->{ "Q_LEN" };
-    push @output, $record->{ "Q_BEG" };
-    push @output, $record->{ "Q_END" } + 1;
-    push @output, $record->{ "S_ID" };
-    push @output, $record->{ "S_LEN" };
-    push @output, $record->{ "S_BEG" };
-    push @output, $record->{ "S_END" } + 1;
-    push @output, $record->{ "BLOCKCOUNT" };
-    push @output, $record->{ "BLOCKSIZES" };
-    push @output, $record->{ "Q_BEGS" };
-    push @output, $record->{ "S_BEGS" };
-
-    print $fh join( "\t", @output ), "\n";
-}
-
-
 sub psl_upload_to_ucsc
 {
     # Martin A. Hansen, September 2007.
@@ -916,6 +732,8 @@ sub ucsc_config_entry_get
 
     while ( $line = <$fh> )
     {
+        $line =~ tr/\n\r//d;
+
         if ( $line =~ /Track added by 'upload_to_ucsc' (\S+) (\S+)/) {
              $record{ 'date' } = $1;
              $record{ 'time' } = $2;
@@ -1011,12 +829,12 @@ sub ucsc_update_config
         longLabel  => $options->{ 'long_label' },
         group      => $options->{ 'group' },
         priority   => $options->{ 'priority' },
-        useScore   => $options->{ 'use_score' },
         visibility => $options->{ 'visibility' },
         color      => $options->{ 'color' },
         type       => $type,
     );
 
+    $new_entry{ 'useScore' }        = 1             if $options->{ 'use_score' };
     $new_entry{ 'mafTrack' }        = "multiz17way" if $type eq "type bed 6 +";
     $new_entry{ 'maxHeightPixels' } = "50:50:11"    if $type eq "wig 0";
 
@@ -1337,6 +1155,11 @@ sub phastcons_parse_entry
     # Given a PhastCons entry converts this to a
     # list of super blocks.
 
+#    $options->{ "min" }       ||= 10;
+#    $options->{ "dist" }      ||= 25;
+#    $options->{ "threshold" } ||= 0.8;
+#    $options->{ "gap" }       ||= 5;
+
     my ( $lines,   # list of lines
          $args,    # argument hash
        ) = @_;
@@ -1444,18 +1267,18 @@ sub phastcons_parse_entry
         $lens[ -1 ]++;
 
         push @entries, {
-            CHR        => $super_block->[ 0 ]->{ "CHR" },
-            CHR_BEG    => $super_block->[ 0 ]->{ "CHR_BEG" },
-            CHR_END    => $super_block->[ -1 ]->{ "CHR_END" },
-            Q_ID       => "Q_ID",
-            SCORE      => 100,
-            STRAND     => "+",
-            THICK_BEG  => $super_block->[ 0 ]->{ "CHR_BEG" },
-            THICK_END  => $super_block->[ -1 ]->{ "CHR_END" } + 1,
-            ITEMRGB    => "0,200,100",
-            BLOCKCOUNT => scalar @{ $super_block },
-            BLOCKSIZES => join( ",", @lens ),
-            Q_BEGS     => join( ",", @begs ),
+            CHR         => $super_block->[ 0 ]->{ "CHR" },
+            CHR_BEG     => $super_block->[ 0 ]->{ "CHR_BEG" },
+            CHR_END     => $super_block->[ -1 ]->{ "CHR_END" },
+            Q_ID        => "Q_ID",
+            SCORE       => 100,
+            STRAND      => "+",
+            THICK_BEG   => $super_block->[ 0 ]->{ "CHR_BEG" },
+            THICK_END   => $super_block->[ -1 ]->{ "CHR_END" } + 1,
+            COLOR       => 0,
+            BLOCK_COUNT => scalar @{ $super_block },
+            BLOCK_LENS  => join( ",", @lens ),
+            Q_BEGS      => join( ",", @begs ),
         };
 
         undef @begs;
@@ -1472,7 +1295,7 @@ sub phastcons_normalize
 
     # Normalizes a list of lists with PhastCons scores,
     # in such a way that each list contains the same number
-    # or PhastCons scores.
+    # of PhastCons scores.
 
     my ( $AoA,    # AoA with PhastCons scores
        ) = @_;
@@ -1536,7 +1359,7 @@ sub phastcons_list_inflate
         # splice @{ $list }, $pos, 0, "X";
     }
 
-    die qq(ERROR: bad inflate\n) if scalar @{ $list } != $len + $diff;
+    die qq(ERROR: Bad inflate\n) if scalar @{ $list } != $len + $diff;
 }
 
 
@@ -1566,7 +1389,7 @@ sub phastcons_list_deflate
         splice @{ $list }, $pos, 1;
     }
 
-    die qq(ERROR: bad deflate\n) if scalar @{ $list } != $len - $diff;
+    die qq(ERROR: Dad deflate\n) if scalar @{ $list } != $len - $diff;
 }
 
 
@@ -1759,9 +1582,13 @@ sub ucsc_get_user
 
     # Returns a string.
 
-    my ( $fh, $line, $user );
+    my ( $file, $fh, $line, $user );
 
-    $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+    $file = "$ENV{ 'HOME' }/.hg.conf";
+
+    return if not -f $file;
+
+    $fh = Maasha::Common::read_open( $file );
 
     while ( $line = <$fh> )
     {
@@ -1790,9 +1617,13 @@ sub ucsc_get_password
 
     # Returns a string.
 
-    my ( $fh, $line, $password );
+    my ( $file, $fh, $line, $password );
+
+    $file = "$ENV{ 'HOME' }/.hg.conf";
+
+    return if not -f $file;
 
-    $fh = Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+    $fh = Maasha::Common::read_open( $file );
 
     while ( $line = <$fh> )
     {