]> git.donarmstrong.com Git - biopieces.git/blobdiff - code_perl/Maasha/Biopieces.pm
fixed rename bug
[biopieces.git] / code_perl / Maasha / Biopieces.pm
index b50b3fca53a695a297dee5a892b1b8b03d66af58..6c38e430698324319052305b5c1c3afcf5fd84dc 100644 (file)
@@ -110,14 +110,8 @@ close $log_local;
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RUN SCRIPT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
-my $t0 = gettimeofday();
-
 run_script( $script );
 
-my $t1 = gettimeofday();
-
-print STDERR "Program: $script" . ( " " x ( 25 - length( $script ) ) ) . sprintf( "Run time: %.4f\n", ( $t1 - $t0 ) );
-
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
@@ -158,7 +152,9 @@ sub run_script
 
     # Returns nothing.
 
-    my ( $options, $in, $out );
+    my ( $t0, $t1, $options, $in, $out );
+
+    $t0 = gettimeofday();
 
     $options = get_options( $script );
 
@@ -267,6 +263,10 @@ sub run_script
 
     close $in if defined $in;
     close $out;
+
+    $t1 = gettimeofday();
+
+    print STDERR "Program: $script" . ( " " x ( 25 - length( $script ) ) ) . sprintf( "Run time: %.4f\n", ( $t1 - $t0 ) ) if $options->{ 'verbose' };
 }
 
 
@@ -747,7 +747,7 @@ sub get_options
         @options = qw(
             adaptor|a=s
             mismatches|m=s
-            no_remove|n
+            remove|r=s
             offset|o=s
         );
     }
@@ -947,9 +947,6 @@ sub get_options
             priority|p=f
             use_score|u
             visibility|v=s
-            wiggle|w
-            score|S
-            log10|L
             color|c=s
             chunk_size|C=s
         );
@@ -1051,6 +1048,10 @@ sub get_options
         {
             Maasha::Common::error( qq(Argument to --$opt must be octal or decimal - not "$options{ $opt }") );
         }
+        elsif ( $opt eq "remove" and $script eq "remove_adaptor" and $options{ $opt } !~ /before|after|skip/ )
+        {
+            Maasha::Common::error( qq(Argument to --$opt must be before, after, or skip - not "$options{ $opt }") );
+        }
     }
 
     Maasha::Common::error( qq(no --database specified) )                if $script eq "create_blast_db"     and not $options{ "database" };
@@ -1425,7 +1426,7 @@ sub script_read_fixedstep
 {
     # Martin A. Hansen, Juli 2008.
 
-    # Read fixedStep wiggle format from stream or file.
+    # Read fixedstep wiggle format from stream or file.
 
     my ( $in,        # handle to in stream
          $out,       # handle to out stream
@@ -1456,7 +1457,7 @@ sub script_read_fixedstep
                 $record->{ "CHR" }      = $1;
                 $record->{ "CHR_BEG" }  = $2;
                 $record->{ "STEP" }     = $3;
-                $record->{ "VALS" }     = join ",", @{ $entry };
+                $record->{ "VALS" }     = join ";", @{ $entry };
             }
 
             put_record( $record, $out );
@@ -2556,10 +2557,11 @@ sub script_calc_fixedstep
                 {
                     map { $_ = sprintf( "%.4f", Maasha::Calc::log10( $_ ) ) } @{ $block } if $options->{ "log10" };
 
-                    $record->{ "CHR" }     = $chr;
-                    $record->{ "CHR_BEG" } = $beg_block;
-                    $record->{ "STEP" }    = 1;
-                    $record->{ "VALS" }    = join ";", @{ $block };
+                    $record->{ "CHR" }      = $chr;
+                    $record->{ "CHR_BEG" }  = $beg_block;
+                    $record->{ "STEP" }     = 1;
+                    $record->{ "VALS" }     = join ";", @{ $block };
+                    $record->{ "REC_TYPE" } = "fixed_step";
 
                     put_record( $record, $out );
 
@@ -2590,10 +2592,11 @@ sub script_calc_fixedstep
 
         map { $_ = sprintf( "%.4f", Maasha::Calc::log10( $_ ) ) } @{ $block } if $options->{ "log10" };
 
-        $record->{ "CHR" }     = $chr;
-        $record->{ "CHR_BEG" } = $beg_block;
-        $record->{ "STEP" }    = 1;
-        $record->{ "VALS" }    = join ";", @{ $block };
+        $record->{ "CHR" }      = $chr;
+        $record->{ "CHR_BEG" }  = $beg_block;
+        $record->{ "STEP" }     = 1;
+        $record->{ "VALS" }     = join ";", @{ $block };
+        $record->{ "REC_TYPE" } = "fixed_step";
 
         put_record( $record, $out );
 
@@ -3102,7 +3105,7 @@ sub script_get_genome_phastcons
     $phastcons_file  = Maasha::Config::genome_phastcons( $options->{ "genome" } );
     $phastcons_index = Maasha::Config::genome_phastcons_index( $options->{ "genome" } );
 
-    $index           = Maasha::UCSC::phastcons_index_retrieve( $phastcons_index );
+    $index           = Maasha::UCSC::fixedstep_index_retrieve( $phastcons_index );
     $fh_phastcons    = Maasha::Common::read_open( $phastcons_file );
 
     if ( defined $options->{ "chr" } and defined $options->{ "beg" } and ( defined $options->{ "end" } or defined $options->{ "len" } ) )
@@ -3114,7 +3117,7 @@ sub script_get_genome_phastcons
             $options->{ "end" } = $options->{ "beg" } + $options->{ "len" } - 1;
         }
 
-        $scores = Maasha::UCSC::phastcons_index_lookup( $index, $fh_phastcons, $options->{ "chr" }, $options->{ "beg" }, $options->{ "end" }, $options->{ "flank" } );
+        $scores = Maasha::UCSC::fixedstep_index_lookup( $index, $fh_phastcons, $options->{ "chr" }, $options->{ "beg" }, $options->{ "end" }, $options->{ "flank" } );
 
         $record->{ "CHR" }       = $options->{ "chr" };
         $record->{ "CHR_BEG" }   = $options->{ "beg" } - $options->{ "flank" };
@@ -4331,7 +4334,7 @@ sub script_write_fixedstep
 
             $vals = $record->{ 'VALS' };
 
-            $vals =~ tr/,/\n/;
+            $vals =~ tr/;/\n/;
 
             print $fh "$vals\n";
         }
@@ -4697,30 +4700,43 @@ sub script_remove_adaptor
 
     my ( $record, $adaptor, $seq, $adaptor_len, $seq_len, $offset, $max_match, $max_mismatch, $pos );
 
+    $options->{ "remove" } ||= "after";
+
     $max_mismatch = $options->{ "mismatches" } || 0;
-    $offset       = $options->{ "offset" }     || 15;
-    $adaptor      = $options->{ "adaptor" };
-    $adaptor_len  = length $adaptor;
-    $adaptor      = [ split //, uc $adaptor ];
+    $offset       = $options->{ "offset" };
 
-    $max_match    = $adaptor_len - $max_mismatch;
+    if ( not defined $offset ) {
+        $offset = 0;
+    } else {
+        $offset--;
+    }
+
+    $adaptor      = uc $options->{ "adaptor" };
+    $adaptor_len  = length $adaptor;
 
     while ( $record = get_record( $in ) ) 
     {
         if ( $record->{ "SEQ" } )
         {
-            $seq     = $record->{ "SEQ" };
+            $seq     = uc $record->{ "SEQ" };
             $seq_len = length $seq;
-            $seq     = [ split //, uc $seq ];
 
-            $pos = Maasha::Seq::find_adaptor( $adaptor, $seq, $adaptor_len, $seq_len, $offset, $max_match, $max_mismatch );
+            $pos = Maasha::Common::index_m( $seq, $adaptor, $seq_len, $adaptor_len, $offset, $max_mismatch );
 
             $record->{ "ADAPTOR_POS" } = $pos;
 
-            if ( $pos >= 0 and not $options->{ "no_remove" } )
+            if ( $pos >= 0 and $options->{ "remove" } ne "skip" )
             {
-                $record->{ "SEQ" }     = substr $record->{ "SEQ" }, 0, $pos;
-                $record->{ "SEQ_LEN" } = $pos;
+                if ( $options->{ "remove" } eq "after" )
+                {
+                    $record->{ "SEQ" }     = substr $record->{ "SEQ" }, 0, $pos;
+                    $record->{ "SEQ_LEN" } = $pos;
+                }
+                else
+                {
+                    $record->{ "SEQ" }     = substr $record->{ "SEQ" }, $pos + $adaptor_len;
+                    $record->{ "SEQ_LEN" } = length $record->{ "SEQ" };
+                }
             }
 
             put_record( $record, $out );
@@ -5581,7 +5597,7 @@ sub script_plot_histogram
 
     while ( $record = get_record( $in ) ) 
     {
-        $data_hash{ $record->{ $options->{ "key" } } }++ if $record->{ $options->{ "key" } };
+        $data_hash{ $record->{ $options->{ "key" } } }++ if defined $record->{ $options->{ "key" } };
 
         put_record( $record, $out ) if not $options->{ "no_stream" };
     }
@@ -5621,7 +5637,7 @@ sub script_plot_lendist
 
     while ( $record = get_record( $in ) ) 
     {
-        $data_hash{ $record->{ $options->{ "key" } } }++ if $record->{ $options->{ "key" } };
+        $data_hash{ $record->{ $options->{ "key" } } }++ if defined $record->{ $options->{ "key" } };
 
         put_record( $record, $out ) if not $options->{ "no_stream" };
     }
@@ -6037,8 +6053,7 @@ sub script_upload_to_ucsc
 
     # Returns nothing.
 
-    my ( $record, $file, $wib_file, $wig_file, $wib_dir, $fh_in, $fh_out, $i, $first, $format, $args, $type, $columns, $append, %fh_hash,
-         $chr, $beg, $end, $block, $line, $max, $beg_block, $entry, $q_id, $clones, $vals );
+    my ( $record, $file, $wib_file, $wig_file, $wib_dir, $fh_out, $i, $first, $format, $type, $columns, $append, $vals );
 
     $options->{ "short_label" } ||= $options->{ 'table' };
     $options->{ "long_label" }  ||= $options->{ 'table' };
@@ -6056,195 +6071,109 @@ sub script_upload_to_ucsc
 
     $i = 0;
 
-    if ( $options->{ 'wiggle' } )
+    $fh_out = Maasha::Common::write_open( $file );
+
+    while ( $record = get_record( $in ) ) 
     {
-        while ( $record = get_record( $in ) )
-        {
-            put_record( $record, $out ) if not $options->{ "no_stream" };
+        put_record( $record, $out ) if not $options->{ "no_stream" };
 
-            $record->{ "CHR" }     = $record->{ "S_ID" }  if not defined $record->{ "CHR" };
-            $record->{ "CHR_BEG" } = $record->{ "S_BEG" } if not defined $record->{ "CHR_BEG" };
-            $record->{ "CHR_END" } = $record->{ "S_END" } if not defined $record->{ "CHR_END" };
+        if ( $record->{ "REC_TYPE" } eq "fixed_step" )
+        {
+            $vals = $record->{ "VALS" };
+            $vals =~ tr/;/\n/;
 
-            $fh_hash{ $record->{ "CHR" } } = Maasha::Common::write_open( "$BP_TMP/$record->{ 'CHR' }" ) if not exists $fh_hash{ $record->{ "CHR" } };
+            print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
+            print $fh_out "$vals\n";
 
-            $fh_out = $fh_hash{ $record->{ "CHR" } };
-            
-            Maasha::UCSC::bed_put_entry( $record, $fh_out, 5 );
+            $format = "WIGGLE" if not $format;
         }
-
-        map { close $_ } keys %fh_hash;
-
-        $fh_out = Maasha::Common::write_open( $file );
-
-        foreach $chr ( sort keys %fh_hash )
+        elsif ( $record->{ "REC_TYPE" } eq "PSL" )
         {
-            Maasha::Common::run( "bedSort", "$BP_TMP/$chr $BP_TMP/$chr" );
-
-            $fh_in = Maasha::Common::read_open( "$BP_TMP/$chr" );
-
-            undef $block;
-
-            while ( $entry = Maasha::UCSC::bed_get_entry( $fh_in, 5 ) )
-            {
-                $chr  = $entry->{ 'CHR' };
-                $beg  = $entry->{ 'CHR_BEG' };
-                $end  = $entry->{ 'CHR_END' };
-                $q_id = $entry->{ 'Q_ID' };
-                
-                if ( $options->{ "score" } ) {
-                    $clones = $entry->{ 'SCORE' };
-                } elsif ( $q_id =~ /_(\d+)$/ ) {
-                    $clones = $1;
-                } else {
-                    $clones = 1;
-                }
-
-                if ( $block )
-                {
-                    if ( $beg > $max )
-                    {
-                        Maasha::UCSC::fixedstep_put_entry( $chr, $beg_block, $block, $fh_out, $options->{ "log10" } );
-                        undef $block;
-                    }
-                    else
-                    {
-                        for ( $i = $beg - $beg_block; $i < ( $beg - $beg_block ) + ( $end - $beg ); $i++ ) {
-                            $block->[ $i ] += $clones;
-                        }
-
-                        $max = Maasha::Calc::max( $max, $end );
-                    }
-                }
-
-                if ( not $block )
-                {
-                    $beg_block = $beg;
-                    $max       = $end;
-
-                    for ( $i = 0; $i < ( $end - $beg ); $i++ ) {
-                        $block->[ $i ] += $clones;
-                    }
-                }
-            }
-
-            close $fh_in;
-
-            Maasha::UCSC::fixedstep_put_entry( $chr, $beg_block, $block, $fh_out, $options->{ "log10" } );
+            Maasha::UCSC::psl_put_header( $fh_out ) if $first;
+            Maasha::UCSC::psl_put_entry( $record, $fh_out );
+            
+            $first = 0;
 
-            unlink "$BP_TMP/$chr";
+            $format = "PSL" if not $format;
         }
-
-        close $fh_out;
-
-        $format = "WIGGLE";
-    }
-    else
-    {
-        $fh_out = Maasha::Common::write_open( $file );
-    
-        while ( $record = get_record( $in ) ) 
+        elsif ( $record->{ "REC_TYPE" } eq "BED" and $record->{ "SEC_STRUCT" } )
         {
-            put_record( $record, $out ) if not $options->{ "no_stream" };
+            # chrom chromStart  chromEnd    name    score   strand  size    secStr  conf 
 
-            if ( $record->{ "REC_TYPE" } eq "fixed_step" )
-            {
-                $vals = $record->{ "VALS" };
-                $vals =~ tr/,/\n/;
+            print $fh_out join ( "\t",
+                $record->{ "CHR" },
+                $record->{ "CHR_BEG" },
+                $record->{ "CHR_END" } + 1,
+                $record->{ "Q_ID" },
+                $record->{ "SCORE" },
+                $record->{ "STRAND" },
+                $record->{ "SIZE" },
+                $record->{ "SEC_STRUCT" },
+                $record->{ "CONF" },
+            ), "\n";
 
-                print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
-                print $fh_out "$vals\n";
+            $format  = "BED_SS" if not $format;
+        }
+        elsif ( $record->{ "REC_TYPE" } eq "BED" )
+        {
+            Maasha::UCSC::bed_put_entry( $record, $fh_out, $record->{ "BED_COLS" } );
 
-                $format = "WIGGLE" if not $format;
-            }
-            elsif ( $record->{ "REC_TYPE" } eq "PSL" )
-            {
-                Maasha::UCSC::psl_put_header( $fh_out ) if $first;
-                Maasha::UCSC::psl_put_entry( $record, $fh_out );
-                
-                $first = 0;
+            $format  = "BED"                   if not $format;
+            $columns = $record->{ "BED_COLS" } if not $columns;
+        }
+        elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "CHR" } )
+        {
+            Maasha::UCSC::bed_put_entry( $record, $fh_out, 6 );
 
-                $format = "PSL" if not $format;
-            }
-            elsif ( $record->{ "REC_TYPE" } eq "BED" and $record->{ "SEC_STRUCT" } )
-            {
-                # chrom chromStart  chromEnd    name    score   strand  size    secStr  conf 
-
-                print $fh_out join ( "\t",
-                    $record->{ "CHR" },
-                    $record->{ "CHR_BEG" },
-                    $record->{ "CHR_END" } + 1,
-                    $record->{ "Q_ID" },
-                    $record->{ "SCORE" },
-                    $record->{ "STRAND" },
-                    $record->{ "SIZE" },
-                    $record->{ "SEC_STRUCT" },
-                    $record->{ "CONF" },
-                ), "\n";
-
-                $format  = "BED_SS" if not $format;
-            }
-            elsif ( $record->{ "REC_TYPE" } eq "BED" )
-            {
-                Maasha::UCSC::bed_put_entry( $record, $fh_out, $record->{ "BED_COLS" } );
+            $format  = "BED" if not $format;
+            $columns = 6     if not $columns;
+        }
+        elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/ )
+        {
+            $record->{ "CHR" }     = $record->{ "S_ID" };
+            $record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+            $record->{ "CHR_END" } = $record->{ "S_END" };
+            $record->{ "SCORE" }   = $record->{ "BIT_SCORE" } * 1000;
 
-                $format  = "BED"                   if not $format;
-                $columns = $record->{ "BED_COLS" } if not $columns;
-            }
-            elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "CHR" } )
-            {
-                Maasha::UCSC::bed_put_entry( $record, $fh_out, 6 );
+            $format  = "BED" if not $format;
+            $columns = 6     if not $columns;
 
-                $format  = "BED" if not $format;
-                $columns = 6     if not $columns;
-            }
-            elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/ )
-            {
-                $record->{ "CHR" }     = $record->{ "S_ID" };
-                $record->{ "CHR_BEG" } = $record->{ "S_BEG" };
-                $record->{ "CHR_END" } = $record->{ "S_END" };
-                $record->{ "SCORE" }   = $record->{ "BIT_SCORE" } * 1000;
+            Maasha::UCSC::bed_put_entry( $record, $fh_out );
+        }
+        elsif ( $record->{ "REC_TYPE" } eq "VMATCH" and $record->{ "S_ID" } =~ /^chr/i )
+        {
+            $record->{ "CHR" }     = $record->{ "S_ID" };
+            $record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+            $record->{ "CHR_END" } = $record->{ "S_END" };
+            $record->{ "SCORE" }   = $record->{ "SCORE" } || 999;
+            $record->{ "SCORE" }   = int( $record->{ "SCORE" } );
 
-                $format  = "BED" if not $format;
-                $columns = 6     if not $columns;
+            $format  = "BED" if not $format;
+            $columns = 6     if not $columns;
 
-                Maasha::UCSC::bed_put_entry( $record, $fh_out );
-            }
-            elsif ( $record->{ "REC_TYPE" } eq "VMATCH" and $record->{ "S_ID" } =~ /^chr/i )
-            {
-                $record->{ "CHR" }     = $record->{ "S_ID" };
-                $record->{ "CHR_BEG" } = $record->{ "S_BEG" };
-                $record->{ "CHR_END" } = $record->{ "S_END" };
-                $record->{ "SCORE" }   = $record->{ "SCORE" } || 999;
-                $record->{ "SCORE" }   = int( $record->{ "SCORE" } );
+            Maasha::UCSC::bed_put_entry( $record, $fh_out, 6 );
+        }
 
-                $format  = "BED" if not $format;
-                $columns = 6     if not $columns;
+        if ( $i == $options->{ "chunk_size" } )
+        {
+            close $fh_out;
 
-                Maasha::UCSC::bed_put_entry( $record, $fh_out, 6 );
+            if ( $format eq "BED" ) {
+                Maasha::UCSC::bed_upload_to_ucsc( $BP_TMP, $file, $options, $append );
+            } elsif ( $format eq "PSL" ) {
+                Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append ); 
             }
 
-            if ( $i == $options->{ "chunk_size" } )
-            {
-                close $fh_out;
-
-                if ( $format eq "BED" ) {
-                    Maasha::UCSC::bed_upload_to_ucsc( $BP_TMP, $file, $options, $append );
-                } elsif ( $format eq "PSL" ) {
-                    Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append ); 
-                }
-
-                unlink $file;
-
-                $first = 1;
+            unlink $file;
 
-                $append = 1;
+            $first = 1;
 
-                $fh_out = Maasha::Common::write_open( $file );
-            }
+            $append = 1;
 
-            $i++;
+            $fh_out = Maasha::Common::write_open( $file );
         }
+
+        $i++;
     }
 
     close $fh_out;
@@ -6523,7 +6452,7 @@ sub clean_tmp
 {
     # Martin A. Hansen, July 2008.
 
-    # Cleans out any unused temporary files and direcotries in BP_TMP.
+    # Cleans out any unused temporary files and directories in BP_TMP.
 
     # Returns nothing.
 
@@ -6553,7 +6482,7 @@ sub clean_tmp
                 elsif ( $pid == $curr_pid )
                 {
                     # print STDERR "Removing current dir: $dir\n";
-                    Maasha::Common::dir_remove( $dir );
+                    Maasha::Common::dir_remove( $dir );
                 }
             }
         }