added mysql related Biopieces
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 11 Nov 2008 04:22:48 +0000 (04:22 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 11 Nov 2008 04:22:48 +0000 (04:22 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@300 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/read_ucsc_config [new file with mode: 0755]
bp_bin/remove_mysql_tables [new file with mode: 0755]
bp_bin/write_ucsc_config [new file with mode: 0755]
code_perl/Maasha/Biopieces.pm
code_perl/Maasha/UCSC.pm

diff --git a/bp_bin/read_ucsc_config b/bp_bin/read_ucsc_config
new file mode 100755 (executable)
index 0000000..4cd1d44
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biopieces;
diff --git a/bp_bin/remove_mysql_tables b/bp_bin/remove_mysql_tables
new file mode 100755 (executable)
index 0000000..4cd1d44
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biopieces;
diff --git a/bp_bin/write_ucsc_config b/bp_bin/write_ucsc_config
new file mode 100755 (executable)
index 0000000..4cd1d44
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biopieces;
index c5c3c9d25061afb0b467835c09e924ff1ecb79dd..0af3b64efaf9fd207800350d6a9f2773ccd103a0 100644 (file)
@@ -185,6 +185,7 @@ sub run_script
     elsif ( $script eq "read_solexa" )              { script_read_solexa(               $in, $out, $options ) }
     elsif ( $script eq "read_solid" )               { script_read_solid(                $in, $out, $options ) }
     elsif ( $script eq "read_mysql" )               { script_read_mysql(                $in, $out, $options ) }
+    elsif ( $script eq "read_ucsc_config" )         { script_read_ucsc_config(          $in, $out, $options ) }
     elsif ( $script eq "assemble_tag_contigs" )     { script_assemble_tag_contigs(      $in, $out, $options ) }
     elsif ( $script eq "format_genome" )            { script_format_genome(             $in, $out, $options ) }
     elsif ( $script eq "length_seq" )               { script_length_seq(                $in, $out, $options ) }
@@ -230,9 +231,11 @@ sub run_script
     elsif ( $script eq "write_fixedstep" )          { script_write_fixedstep(           $in, $out, $options ) }
     elsif ( $script eq "write_2bit" )               { script_write_2bit(                $in, $out, $options ) }
     elsif ( $script eq "write_solid" )              { script_write_solid(               $in, $out, $options ) }
+    elsif ( $script eq "write_ucsc_config" )        { script_write_ucsc_config(         $in, $out, $options ) }
     elsif ( $script eq "head_records" )             { script_head_records(              $in, $out, $options ) }
     elsif ( $script eq "remove_keys" )              { script_remove_keys(               $in, $out, $options ) }
     elsif ( $script eq "remove_adaptor" )           { script_remove_adaptor(            $in, $out, $options ) }
+    elsif ( $script eq "remove_mysql_tables" )      { script_remove_mysql_tables(       $in, $out, $options ) }
     elsif ( $script eq "rename_keys" )              { script_rename_keys(               $in, $out, $options ) }
     elsif ( $script eq "uniq_vals" )                { script_uniq_vals(                 $in, $out, $options ) }
     elsif ( $script eq "merge_vals" )               { script_merge_vals(                $in, $out, $options ) }
@@ -413,6 +416,13 @@ sub get_options
             password|p=s
         );
     }
+    elsif ( $script eq "read_ucsc_config" )
+    {
+        @options = qw(
+            data_in|i=s
+            num|n=s
+        );
+    }
     elsif ( $script eq "format_genome" )
     {
         @options = qw(
@@ -701,6 +711,13 @@ sub get_options
             compress|Z
         );
     }
+    elsif ( $script eq "write_ucsc_config" )
+    {
+        @options = qw(
+            no_stream|x
+            data_out|o=s
+        );
+    }
     elsif ( $script eq "plot_seqlogo" )
     {
         @options = qw(
@@ -752,6 +769,17 @@ sub get_options
             offset|o=s
         );
     }
+    elsif ( $script eq "remove_mysql_tables" )
+    {
+        @options = qw(
+            database|d=s
+            keys|k=s
+            query|q=s
+            user|u=s
+            password|p=s
+            no_stream|x
+        );
+    }
     elsif ( $script eq "rename_keys" )
     {
         @options = qw(
@@ -2077,6 +2105,51 @@ sub script_read_mysql
 }
 
 
+sub script_read_ucsc_config
+{
+    # Martin A. Hansen, November 2008.
+
+    # Read track entries from UCSC Genome Browser '.ra' files.
+
+    my ( $in,        # handle to in stream
+         $out,       # handle to out stream
+         $options,   # options hash
+       ) = @_;
+
+    # Returns nothing.
+
+    my ( $record, $file, $data_in, $entry, $num );
+
+    while ( $record = get_record( $in ) ) {
+        put_record( $record, $out );
+    }
+
+    $num = 1;
+
+    foreach $file ( @{ $options->{ "files" } } )
+    {
+        $data_in = Maasha::Common::read_open( $file );
+
+        while ( $record = Maasha::UCSC::ucsc_config_get_entry( $data_in ) ) 
+        {
+            $record->{ 'REC_TYPE' } = "UCSC Config";
+
+            put_record( $record, $out );
+
+            goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+            $num++;
+        }
+
+        close $data_in;
+    }
+
+    NUM:
+
+    close $data_in if $data_in;
+}
+
+
 sub script_assemble_tag_contigs
 {
     # Martin A. Hansen, November 2008.
@@ -4573,6 +4646,35 @@ sub script_write_solid
 }
 
 
+sub script_write_ucsc_config
+{
+    # Martin A. Hansen, November 2008.
+
+    # Write UCSC Genome Broser configuration (.ra file type) from
+    # records in the stream.
+
+    my ( $in,        # handle to in stream
+         $out,       # handle to out stream
+         $options,   # options hash
+       ) = @_;
+
+    # Returns nothing.
+
+    my ( $record, $fh );
+
+    $fh = write_stream( $options->{ "data_out" } );
+
+    while ( $record = get_record( $in ) ) 
+    {
+        Maasha::UCSC::ucsc_config_put_entry( $record, $fh ) if $record->{ "REC_TYPE" } eq "UCSC Config";
+
+        put_record( $record, $out ) if not $options->{ "no_stream" };
+    }
+
+    close $fh;
+}
+
+
 sub script_plot_seqlogo
 {
     # Martin A. Hansen, August 2007.
@@ -4902,6 +5004,51 @@ sub script_remove_adaptor
 }
 
 
+sub script_remove_mysql_tables
+{
+    # Martin A. Hansen, November 2008.
+
+    # Remove MySQL tables from values in stream.
+
+    my ( $in,        # handle to in stream
+         $out,       # handle to out stream
+         $options,   # options hash
+       ) = @_;
+
+    # Returns nothing.
+
+    my ( $record, %table_hash, $dbh, $table );
+
+    $options->{ "user" }     ||= Maasha::UCSC::ucsc_get_user();
+    $options->{ "password" } ||= Maasha::UCSC::ucsc_get_password();
+
+    while ( $record = get_record( $in ) )
+    {
+        map { $table_hash{ $record->{ $_ } } = 1 } @{ $options->{ 'keys' } };
+
+        put_record( $record, $out ) if not $options->{ 'no_stream' };
+    }
+
+    $dbh = Maasha::SQL::connect( $options->{ "database" }, $options->{ "user" }, $options->{ "password" } );
+
+    foreach $table ( sort keys %table_hash )
+    {
+        if ( Maasha::SQL::table_exists( $dbh, $table ) )
+        {
+            print STDERR qq(Removing table "$table" from database "$options->{ 'database' }" ... ) if $options->{ 'verbose' };
+            Maasha::SQL::delete_table( $dbh, $table );
+            print STDERR "done.\n" if $options->{ 'verbose' };
+        }
+        else
+        {
+            print STDERR qq(WARNING: table "$table" not found in database "$options->{ 'database' }\n");
+        }
+    }
+
+    Maasha::SQL::disconnect( $dbh );
+}
+
+
 sub script_rename_keys
 {
     # Martin A. Hansen, August 2007.
index 303b6b7038f257e97040f312580718923efdfe02..d9a9341e7317ac1693c4d7181c4628372ec1ad76 100644 (file)
@@ -899,6 +899,86 @@ sub psl_upload_to_ucsc
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
+sub ucsc_config_get_entry
+{
+    # Martin A. Hansen, November 2008.
+
+    # Given a filehandle to a UCSC Genome Browser
+    # config file (.ra) get the next entry and
+    # return as a hash. Entries are separated by blank
+    # lines. # lines are skipped unless it is the lines:
+    # # Track added ...
+    # # Database ...
+
+    my ( $fh,   # file hanlde
+       ) = @_;
+
+    # Returns a hashref.
+
+    my ( $line, %record );
+
+    while ( $line = <$fh> )
+    {
+        if ( $line =~ /^# date: (.+)/ ) {
+            $record{ 'date' } = $1;
+        } elsif ( $line =~ /^# time: (.+)/ ) {
+            $record{ 'time' } = $1;
+        } elsif ( $line =~ /^# database: (.+)/ ) {
+            $record{ 'database' } = $1;
+        } elsif ( $line =~ /^#/ ) {
+            # skip
+        } elsif ( $line =~ /(\S+)\s+(.+)/ ) {
+            $record{ $1 } = $2;
+        }
+
+        last if $line =~ /^$/ and exists $record{ "track" };
+    }
+
+    return undef if not exists $record{ "track" };
+
+    return wantarray ? %record : \%record;
+}
+
+
+sub ucsc_config_put_entry
+{
+    # Martin A. Hansen, November 2008.
+
+    # Outputs a Biopiece record (a hashref)
+    # to a filehandle or STDOUT.
+
+    my ( $record,    # hashref
+         $fh_out,    # file handle
+       ) = @_;
+
+    # Returns nothing.
+
+    my ( $date, $time );
+
+    $fh_out ||= \*STDOUT;
+
+    print $fh_out "\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n";
+
+    map { print $fh_out "# $record->{ $_ }\n" if exists $record->{ $_ } } qw( date time database );
+    map { print $fh_out "$_ $record->{ $_ }\n" if exists $record->{ $_ } } qw( track
+                                                                               name
+                                                                               description
+                                                                               itemRgb
+                                                                               db
+                                                                               offset
+                                                                               url
+                                                                               htmlUrl
+                                                                               shortLabel
+                                                                               longLabel
+                                                                               group
+                                                                               priority
+                                                                               useScore
+                                                                               visibility
+                                                                               color
+                                                                               type );
+}
+
+
 sub update_my_tracks
 {
     # Martin A. Hansen, September 2007.