From 462d75f91bfd1f942c9dbeca963a03c64d0ada39 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 24 Nov 2009 13:11:00 +0000 Subject: [PATCH] rewrote upload_to_KISS git-svn-id: http://biopieces.googlecode.com/svn/trunk@767 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/upload_to_KISS | 163 ++++++++++++------------------------------ 1 file changed, 44 insertions(+), 119 deletions(-) diff --git a/bp_bin/upload_to_KISS b/bp_bin/upload_to_KISS index 6ca989d..9e9f039 100755 --- a/bp_bin/upload_to_KISS +++ b/bp_bin/upload_to_KISS @@ -21,7 +21,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Write Biopiece records to a KISS MySQL database. +# Write Biopiece records to the KISS Genome browser. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -29,7 +29,6 @@ use warnings; use strict; use Maasha::Common; -use Maasha::SQL; use Maasha::KISS::IO; use Maasha::Biopieces; use Maasha::Filesys; @@ -38,59 +37,42 @@ use Maasha::Filesys; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $user, $password, $dbh, $in, $out, $tmp_dir, $tmp_file, $fh_out, $record, $entry ); +my ( $data_dir, $user, $options, $path, $in, $out, $tmp_dir, %fh_hash, $fh_out, $record, $entry, $key, @dirs, $dir, $dst_dir, @nums, $num ); -$user = Maasha::Biopieces::biopiecesrc( "MYSQL_USER" ); -$password = Maasha::Biopieces::biopiecesrc( "MYSQL_PASSWORD" ); +$data_dir = Maasha::Biopieces::biopiecesrc( "KISS_DATA_DIR" ); +$user = Maasha::Biopieces::biopiecesrc( "KISS_USER" ); $options = Maasha::Biopieces::parse_options( [ - { long => 'user', short => 'u', type => 'string', mandatory => 'no', default => $user, allowed => undef, disallowed => undef }, - { long => 'password', short => 'p', type => 'string', mandatory => 'no', default => $password, allowed => undef, disallowed => undef }, - { long => 'database', short => 'd', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, - { long => 'table', short => 't', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, - { long => 'table_replace', short => 'T', type => 'flag', mandatory => 'no' , default => undef, allowed => undef, disallowed => undef }, - { long => 'table_append', short => 'A', type => 'flag', mandatory => 'no' , default => undef, allowed => undef, disallowed => undef }, - { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'user', short => 'u', type => 'string', mandatory => 'no', default => $user, allowed => undef, disallowed => undef }, + { long => 'clade', short => 'c', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'genome', short => 'g', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'assembly', short => 'a', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'track_name', short => 't', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); - -$tmp_dir = Maasha::Biopieces::get_tmpdir(); -$tmp_file = "$tmp_dir/upload_to_KISS.kiss"; +$options->{ 'track_name' } =~ tr/ /_/; -$dbh = Maasha::SQL::connect( $options->{ 'database' }, $options->{ 'user' }, $options->{ 'password' } ); +$path = join "/", $data_dir, "Users/", $options->{ 'user' }, $options->{ 'clade' }, $options->{ 'genome' }, $options->{ 'assembly' }; -if ( Maasha::SQL::table_exists( $dbh, $options->{ 'table' } ) ) -{ - if ( not $options->{ 'table_replace' } and not $options->{ 'table_append' } ) { - Maasha::Common::error( qq(Table "$options->{ 'table' }" exists. Use --table_replace or --table_append) ) - } +Maasha::Common::error( qq(Path not found: "$path") ) if not -d $path; - if ( $options->{ 'table_replace' } ) { - Maasha::SQL::delete_table( $dbh, $options->{ 'table' } ) if Maasha::SQL::table_exists( $dbh, $options->{ 'table' } ); - } - - if ( $options->{ 'sql' } ) { - Maasha::SQL::request( $dbh, $options->{ 'sql' } ); - } elsif ( not $options->{ 'table_append' } ) { - create_table( $dbh, $options->{ 'table' }, $options->{ 'verbose' } ); - } -} -else -{ - create_table( $dbh, $options->{ 'table' }, $options->{ 'verbose' } ); -} +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); -$fh_out = Maasha::Filesys::file_write_open( $tmp_file ); +$tmp_dir = Maasha::Biopieces::get_tmpdir(); while ( $record = Maasha::Biopieces::get_record( $in ) ) { if ( $entry = Maasha::KISS::IO::biopiece2kiss( $record ) ) { - map { $entry->{ $_ } = '\N' if $entry->{ $_ } eq '.' } keys %{ $entry }; + if ( not exists $fh_hash{ $entry->{ 'S_ID' } } ) { + $fh_hash{ $entry->{ 'S_ID' } } = Maasha::Filesys::file_write_open( "$tmp_dir/$entry->{ 'S_ID' }" ); + } + + $fh_out = $fh_hash{ $entry->{ 'S_ID' } }; Maasha::KISS::IO::kiss_entry_put( $entry, $fh_out ); } @@ -98,85 +80,44 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; } -close $fh_out; - -bulk_load_file( $dbh, $tmp_file, $options->{ 'table' }, $options->{ 'verbose' } ); - -Maasha::SQL::disconnect( $dbh ); - -unlink $tmp_file; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -sub bulk_load_file +foreach $key ( keys %fh_hash ) { - # Martin A. Hansen, October 2009 - # - # Bulk load a tab separated file to MySQL while - # converting . to NULL. - - my ( $dbh, # Database handle - $file, # File to load - $table, # Table name - $verbose, # Verbose flag - ) = @_; - - # Returns nothing. - - my ( $sql ); - - $sql = qq(LOAD DATA LOCAL INFILE "$file" INTO TABLE $table); + close $fh_hash{ $key }; - print STDERR "$sql\n" if $verbose; - - Maasha::SQL::request( $dbh, $sql ); -} + $dst_dir = Maasha::Filesys::dir_create_if_not_exists( "$path/$key" ); + $dst_dir = Maasha::Filesys::dir_create_if_not_exists( "$dst_dir/Tracks" ); + @dirs = Maasha::Filesys::ls_dirs( $dst_dir ); -sub create_table -{ - # Martin A. Hansen, July 2009 + foreach $dir ( @dirs ) + { + $dir = ( split "/", $dir )[ -1 ]; - # Create a new MySQL table. + if ( $dir =~ /^(\d+)/ ) { + push @nums, $1; + } + } - my ( $dbh, # Database handle - $table, # Table name - $verbose, # Verbose switch - ) = @_; + @nums = sort { $a <=> $b } @nums; - # Returns nothing. + $num = $nums[ -1 ] || 0; - my ( @fields, $field_str, $sql ); + $num += 10; - @fields = ( - "S_ID VARCHAR(256)", - "S_BEG INT, INDEX S_BEG_index (S_BEG)", - "S_END INT, INDEX S_END_index (S_END)", - "Q_ID VARCHAR(256)", - "SCORE FLOAT", - "STRAND CHAR(1)", - "HITS INT", - "ALIGN VARCHAR(256)", - "BLOCK_COUNT TINYINT", - "BLOCK_BEGS VARCHAR(1024)", - "BLOCK_LENS VARCHAR(1024)", - "BLOCK_TYPE VARCHAR(1024)", - ); + $dst_dir = "$dst_dir/$num" . "_$options->{ 'track_name' }"; - $field_str = join( ", ", @fields ); + Maasha::Filesys::dir_create( $dst_dir ); - $sql = "CREATE TABLE $table ($field_str)"; + Maasha::Filesys::file_copy( "$tmp_dir/$key", "$dst_dir/track_data.kiss" ); - print STDERR "$sql\n" if $verbose; + Maasha::KISS::IO::kiss_index( "$dst_dir/track_data.kiss" ); - Maasha::SQL::request( $dbh, $sql ); + unlink "$tmp_dir/$key"; } +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -189,7 +130,6 @@ BEGIN END { - Maasha::SQL::disconnect( $dbh ) if $dbh; Maasha::Biopieces::status_log(); } @@ -198,18 +138,3 @@ END __END__ - - @fields = ( - "S_ID VARCHAR(256), INDEX S_ID_index (S_ID)", - "S_BEG INT, INDEX S_BEG_index (S_BEG)", - "S_END INT, INDEX S_END_index (S_END)", - "Q_ID VARCHAR(256), INDEX Q_ID_index (Q_ID)", - "SCORE FLOAT, INDEX SCORE_index (SCORE)", - "STRAND CHAR(1), INDEX STRAND_index (STRAND)", - "HITS INT, INDEX HITS_index (HITS)", - "ALIGN VARCHAR(256)", - "BLOCK_COUNT TINYINT, INDEX BLOCK_COUNT_index (BLOCK_COUNT)", - "BLOCK_BEGS VARCHAR(1024)", - "BLOCK_LENS VARCHAR(1024)", - "BLOCK_TYPE VARCHAR(1024)", - ); -- 2.39.5