3 # Copyright (C) 2007-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # Write Biopiece records to a MySQL database.
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 use Maasha::Biopieces;
37 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 my ( $options, $user, $password, $dbh, $in, $out, $tmp_dir, $tmp_file, $fh_out, $record, $key, $type_hash, $i, @keys, $line );
42 $user = $ENV{ "USER" };
43 $password = $ENV{ "USER" };
45 $options = Maasha::Biopieces::parse_options(
47 { long => 'user', short => 'u', type => 'string', mandatory => 'no', default => $user, allowed => undef, disallowed => undef },
48 { long => 'password', short => 'p', type => 'string', mandatory => 'no', default => $password, allowed => undef, disallowed => undef },
49 { long => 'database', short => 'd', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
50 { long => 'table', short => 't', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
51 { long => 'index', short => 'i', type => 'list', mandatory => 'no' , default => undef, allowed => undef, disallowed => undef },
52 { long => 'database_replace', short => 'D', type => 'flag', mandatory => 'no' , default => undef, allowed => undef, disallowed => undef },
53 { long => 'table_replace', short => 'T', type => 'flat', mandatory => 'no' , default => undef, allowed => undef, disallowed => undef },
54 { long => 'table_append', short => 'A', type => 'flat', mandatory => 'no' , default => undef, allowed => undef, disallowed => undef },
55 { long => 'guess_type', short => 'g', type => 'uint', mandatory => 'no', default => 1000, allowed => undef, disallowed => 0 },
56 { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => $password, allowed => undef, disallowed => undef },
60 $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
61 $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
63 $options->{ 'guess_type' } = 0 if $options->{ 'table_append' }; # no point in analyzing data types if appending.
65 $tmp_dir = Maasha::Biopieces::get_tmpdir();
66 $tmp_file = "$tmp_dir/mysql.tab";
68 $fh_out = Maasha::Filesys::file_write_open( $tmp_file );
74 while ( $record = Maasha::Biopieces::get_record( $in ) )
77 @keys = sort keys %{ $record } if not @keys;
79 guess_types( $type_hash, $record ) if $i < $options->{ 'guess_type' };
81 map { $line .= "$record->{ $_ }\t" } @keys;
84 print $fh_out "$line\n";
86 Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
93 if ( Maasha::SQL::database_exists( $options->{ 'database' }, $options->{ 'user' }, $options->{ 'password' } ) )
95 if ( $options->{ 'database_replace' } ) {
96 Maasha::SQL::delete_database( $options->{ 'database' }, $options->{ 'user' }, $options->{ 'password' } );
98 Maasha::Common::error( qq(Database "$options->{ 'database' }" already exists - use --database_replace to overwrite.) );
102 Maasha::SQL::create_database( $options->{ 'database' }, $options->{ 'user' }, $options->{ 'password' } );
104 $dbh = Maasha::SQL::connect( $options->{ 'database' }, $options->{ 'user' }, $options->{ 'password' } );
106 if ( $options->{ 'table_replace' } ) {
107 Maasha::SQL::delete_table( $dbh, $options->{ 'table' } ) if Maasha::SQL::table_exists( $dbh, $options->{ 'table' } );
110 if ( not $options->{ 'table_append' } ) {
111 create_table( $dbh, $options->{ 'table' }, $type_hash, $options->{ 'index' }, $options->{ 'verbose' } );
114 Maasha::SQL::bulk_load_file( $dbh, $tmp_file, $options->{ 'table' }, "\t" ) if Maasha::SQL::table_exists( $dbh, $options->{ 'table' } );
116 $dbh = Maasha::SQL::disconnect( $dbh );
120 Maasha::Biopieces::close_stream( $in );
121 Maasha::Biopieces::close_stream( $out );
124 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
129 # Martin A. Hansen, July 2009.
131 # Guess the MySQL table types from Biopiece records.
133 my ( $type_hash, # Hash with type info
134 $record, # Biopiece record
141 foreach $key ( keys %{ $record } )
143 if ( exists $type_hash->{ $key } )
145 if ( Maasha::Calc::is_a_number( $record->{ $key } ) )
147 if ( $record->{ $key } =~ tr/.// ) {
148 $type_hash->{ $key }->{ "TYPE" } = "FLOAT";
150 $type_hash->{ $key }->{ "TYPE" } = "INTEGER";
153 $type_hash->{ $key }->{ "MIN" } = Maasha::Calc::min( $record->{ $key }, $type_hash->{ $key }->{ "MIN" } );
154 $type_hash->{ $key }->{ "MAX" } = Maasha::Calc::max( $record->{ $key }, $type_hash->{ $key }->{ "MAX" } );
158 $type_hash->{ $key }->{ "TYPE" } = "ALPH";
159 $type_hash->{ $key }->{ "MIN" } = Maasha::Calc::min( length $record->{ $key }, $type_hash->{ $key }->{ "MIN" } );
160 $type_hash->{ $key }->{ "MAX" } = Maasha::Calc::max( length $record->{ $key }, $type_hash->{ $key }->{ "MAX" } );
165 if ( Maasha::Calc::is_a_number( $record->{ $key } ) )
167 $type_hash->{ $key }->{ "TYPE" } = "NUMBER";
168 $type_hash->{ $key }->{ "MIN" } = $record->{ $key };
169 $type_hash->{ $key }->{ "MAX" } = $record->{ $key };
173 $type_hash->{ $key }->{ "TYPE" } = "ALPH";
174 $type_hash->{ $key }->{ "MIN" } = length $record->{ $key };
175 $type_hash->{ $key }->{ "MAX" } = length $record->{ $key };
184 # Martin A. Hansen, July 2009
186 # Create a new MySQL table.
188 my ( $dbh, # Database handle
190 $type_hash, # Hashref with type info.
191 $fields_index, # Fields to index
192 $verbose, # Verbose switch
197 my ( %index_hash, $key, $field, @fields, $fields, $diff, $sql );
199 map { $index_hash{ $_ } = 1 } @{ $fields_index };
201 foreach $key ( sort keys %{ $type_hash } )
205 if ( $type_hash->{ $key }->{ "TYPE" } eq "INTEGER" )
209 if ( $type_hash->{ $key }->{ "MIN" } >= 0 ) {
210 # $field .= "UNSIGNED ";
213 $diff = $type_hash->{ $key }->{ "MAX" } - $type_hash->{ $key }->{ "MIN" };
215 if ( $diff <= 255 ) {
217 } elsif ( $diff <= 65535 ) {
218 $field .= "SMALLINT";
219 } elsif ( $diff <= 16777215 ) {
220 $field .= "MEDIUMINT";
221 } elsif ( $diff <= 4294967295 ) {
227 elsif ( $type_hash->{ $key }->{ "TYPE" } eq "FLOAT" )
229 $field .= "$key FLOAT"; # FLOAT(p) where p=precision from 0 to 23 results in a four-byte single-precision FLOAT column.
230 # A precision from 24 to 53 results in an eight-byte double-precision coulumn.
234 if ( $type_hash->{ $key }->{ "MIN" } == $type_hash->{ $key }->{ "MAX" } ) {
235 $field = "$key CHAR($type_hash->{ $key }->{ 'MAX' })";
237 $field = "$key VARCHAR($type_hash->{ $key }->{ 'MAX' })";
241 $field .= ", INDEX $key" . "_index ($key)" if exists $index_hash{ $key };
243 push @fields, $field;
246 $fields = join( ", ", @fields );
248 $sql = "CREATE TABLE $table ($fields)"; #. " MAX_ROWS = 100000000";
250 print STDERR "$sql\n" if $verbose;
252 Maasha::SQL::request( $dbh, $sql );
256 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
261 Maasha::Biopieces::status_set();
267 Maasha::Biopieces::status_log();
271 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<