From d389674734d15214ad3aebb05743e21788832bcb Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 11 Nov 2008 04:22:48 +0000 Subject: [PATCH] added mysql related Biopieces git-svn-id: http://biopieces.googlecode.com/svn/trunk@300 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/read_ucsc_config | 6 ++ bp_bin/remove_mysql_tables | 6 ++ bp_bin/write_ucsc_config | 6 ++ code_perl/Maasha/Biopieces.pm | 147 ++++++++++++++++++++++++++++++++++ code_perl/Maasha/UCSC.pm | 80 ++++++++++++++++++ 5 files changed, 245 insertions(+) create mode 100755 bp_bin/read_ucsc_config create mode 100755 bp_bin/remove_mysql_tables create mode 100755 bp_bin/write_ucsc_config diff --git a/bp_bin/read_ucsc_config b/bp_bin/read_ucsc_config new file mode 100755 index 0000000..4cd1d44 --- /dev/null +++ b/bp_bin/read_ucsc_config @@ -0,0 +1,6 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +use Maasha::Biopieces; diff --git a/bp_bin/remove_mysql_tables b/bp_bin/remove_mysql_tables new file mode 100755 index 0000000..4cd1d44 --- /dev/null +++ b/bp_bin/remove_mysql_tables @@ -0,0 +1,6 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +use Maasha::Biopieces; diff --git a/bp_bin/write_ucsc_config b/bp_bin/write_ucsc_config new file mode 100755 index 0000000..4cd1d44 --- /dev/null +++ b/bp_bin/write_ucsc_config @@ -0,0 +1,6 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +use Maasha::Biopieces; diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index c5c3c9d..0af3b64 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -185,6 +185,7 @@ sub run_script elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) } elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) } elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) } + elsif ( $script eq "read_ucsc_config" ) { script_read_ucsc_config( $in, $out, $options ) } elsif ( $script eq "assemble_tag_contigs" ) { script_assemble_tag_contigs( $in, $out, $options ) } elsif ( $script eq "format_genome" ) { script_format_genome( $in, $out, $options ) } elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) } @@ -230,9 +231,11 @@ sub run_script elsif ( $script eq "write_fixedstep" ) { script_write_fixedstep( $in, $out, $options ) } elsif ( $script eq "write_2bit" ) { script_write_2bit( $in, $out, $options ) } elsif ( $script eq "write_solid" ) { script_write_solid( $in, $out, $options ) } + elsif ( $script eq "write_ucsc_config" ) { script_write_ucsc_config( $in, $out, $options ) } elsif ( $script eq "head_records" ) { script_head_records( $in, $out, $options ) } elsif ( $script eq "remove_keys" ) { script_remove_keys( $in, $out, $options ) } elsif ( $script eq "remove_adaptor" ) { script_remove_adaptor( $in, $out, $options ) } + elsif ( $script eq "remove_mysql_tables" ) { script_remove_mysql_tables( $in, $out, $options ) } elsif ( $script eq "rename_keys" ) { script_rename_keys( $in, $out, $options ) } elsif ( $script eq "uniq_vals" ) { script_uniq_vals( $in, $out, $options ) } elsif ( $script eq "merge_vals" ) { script_merge_vals( $in, $out, $options ) } @@ -413,6 +416,13 @@ sub get_options password|p=s ); } + elsif ( $script eq "read_ucsc_config" ) + { + @options = qw( + data_in|i=s + num|n=s + ); + } elsif ( $script eq "format_genome" ) { @options = qw( @@ -701,6 +711,13 @@ sub get_options compress|Z ); } + elsif ( $script eq "write_ucsc_config" ) + { + @options = qw( + no_stream|x + data_out|o=s + ); + } elsif ( $script eq "plot_seqlogo" ) { @options = qw( @@ -752,6 +769,17 @@ sub get_options offset|o=s ); } + elsif ( $script eq "remove_mysql_tables" ) + { + @options = qw( + database|d=s + keys|k=s + query|q=s + user|u=s + password|p=s + no_stream|x + ); + } elsif ( $script eq "rename_keys" ) { @options = qw( @@ -2077,6 +2105,51 @@ sub script_read_mysql } +sub script_read_ucsc_config +{ + # Martin A. Hansen, November 2008. + + # Read track entries from UCSC Genome Browser '.ra' files. + + my ( $in, # handle to in stream + $out, # handle to out stream + $options, # options hash + ) = @_; + + # Returns nothing. + + my ( $record, $file, $data_in, $entry, $num ); + + while ( $record = get_record( $in ) ) { + put_record( $record, $out ); + } + + $num = 1; + + foreach $file ( @{ $options->{ "files" } } ) + { + $data_in = Maasha::Common::read_open( $file ); + + while ( $record = Maasha::UCSC::ucsc_config_get_entry( $data_in ) ) + { + $record->{ 'REC_TYPE' } = "UCSC Config"; + + put_record( $record, $out ); + + goto NUM if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + + close $data_in; + } + + NUM: + + close $data_in if $data_in; +} + + sub script_assemble_tag_contigs { # Martin A. Hansen, November 2008. @@ -4573,6 +4646,35 @@ sub script_write_solid } +sub script_write_ucsc_config +{ + # Martin A. Hansen, November 2008. + + # Write UCSC Genome Broser configuration (.ra file type) from + # records in the stream. + + my ( $in, # handle to in stream + $out, # handle to out stream + $options, # options hash + ) = @_; + + # Returns nothing. + + my ( $record, $fh ); + + $fh = write_stream( $options->{ "data_out" } ); + + while ( $record = get_record( $in ) ) + { + Maasha::UCSC::ucsc_config_put_entry( $record, $fh ) if $record->{ "REC_TYPE" } eq "UCSC Config"; + + put_record( $record, $out ) if not $options->{ "no_stream" }; + } + + close $fh; +} + + sub script_plot_seqlogo { # Martin A. Hansen, August 2007. @@ -4902,6 +5004,51 @@ sub script_remove_adaptor } +sub script_remove_mysql_tables +{ + # Martin A. Hansen, November 2008. + + # Remove MySQL tables from values in stream. + + my ( $in, # handle to in stream + $out, # handle to out stream + $options, # options hash + ) = @_; + + # Returns nothing. + + my ( $record, %table_hash, $dbh, $table ); + + $options->{ "user" } ||= Maasha::UCSC::ucsc_get_user(); + $options->{ "password" } ||= Maasha::UCSC::ucsc_get_password(); + + while ( $record = get_record( $in ) ) + { + map { $table_hash{ $record->{ $_ } } = 1 } @{ $options->{ 'keys' } }; + + put_record( $record, $out ) if not $options->{ 'no_stream' }; + } + + $dbh = Maasha::SQL::connect( $options->{ "database" }, $options->{ "user" }, $options->{ "password" } ); + + foreach $table ( sort keys %table_hash ) + { + if ( Maasha::SQL::table_exists( $dbh, $table ) ) + { + print STDERR qq(Removing table "$table" from database "$options->{ 'database' }" ... ) if $options->{ 'verbose' }; + Maasha::SQL::delete_table( $dbh, $table ); + print STDERR "done.\n" if $options->{ 'verbose' }; + } + else + { + print STDERR qq(WARNING: table "$table" not found in database "$options->{ 'database' }\n"); + } + } + + Maasha::SQL::disconnect( $dbh ); +} + + sub script_rename_keys { # Martin A. Hansen, August 2007. diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 303b6b7..d9a9341 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -899,6 +899,86 @@ sub psl_upload_to_ucsc # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +sub ucsc_config_get_entry +{ + # Martin A. Hansen, November 2008. + + # Given a filehandle to a UCSC Genome Browser + # config file (.ra) get the next entry and + # return as a hash. Entries are separated by blank + # lines. # lines are skipped unless it is the lines: + # # Track added ... + # # Database ... + + my ( $fh, # file hanlde + ) = @_; + + # Returns a hashref. + + my ( $line, %record ); + + while ( $line = <$fh> ) + { + if ( $line =~ /^# date: (.+)/ ) { + $record{ 'date' } = $1; + } elsif ( $line =~ /^# time: (.+)/ ) { + $record{ 'time' } = $1; + } elsif ( $line =~ /^# database: (.+)/ ) { + $record{ 'database' } = $1; + } elsif ( $line =~ /^#/ ) { + # skip + } elsif ( $line =~ /(\S+)\s+(.+)/ ) { + $record{ $1 } = $2; + } + + last if $line =~ /^$/ and exists $record{ "track" }; + } + + return undef if not exists $record{ "track" }; + + return wantarray ? %record : \%record; +} + + +sub ucsc_config_put_entry +{ + # Martin A. Hansen, November 2008. + + # Outputs a Biopiece record (a hashref) + # to a filehandle or STDOUT. + + my ( $record, # hashref + $fh_out, # file handle + ) = @_; + + # Returns nothing. + + my ( $date, $time ); + + $fh_out ||= \*STDOUT; + + print $fh_out "\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"; + + map { print $fh_out "# $record->{ $_ }\n" if exists $record->{ $_ } } qw( date time database ); + map { print $fh_out "$_ $record->{ $_ }\n" if exists $record->{ $_ } } qw( track + name + description + itemRgb + db + offset + url + htmlUrl + shortLabel + longLabel + group + priority + useScore + visibility + color + type ); +} + + sub update_my_tracks { # Martin A. Hansen, September 2007. -- 2.39.5