3 # Copyright (C) 2007-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # Upload data to local UCSC Genome Browser Database.
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
31 use Maasha::Biopieces;
34 use Maasha::UCSC::Wiggle;
35 use Maasha::UCSC::PSL;
36 use Maasha::UCSC::BED;
39 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
42 my ( $options, $in, $out, $record, $tmp_dir, $file, $wib_file, $wig_file, $wib_dir, $fh_out, $i, $first, $format, $type, $columns, $append, $entry );
44 $options = Maasha::Biopieces::parse_options(
46 { long => 'database', short => 'd', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
47 { long => 'table', short => 't', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
48 { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
49 { long => 'short_label', short => 's', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
50 { long => 'long_label', short => 'l', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
51 { long => 'group', short => 'g', type => 'string', mandatory => 'no', default => $ENV{ 'LOGNAME' }, allowed => undef, disallowed => undef },
52 { long => 'priority', short => 'p', type => 'float', mandatory => 'no', default => 1, allowed => undef, disallowed => undef },
53 { long => 'use_score', short => 'u', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
54 { long => 'visibility', short => 'V', type => 'string', mandatory => 'no', default => 'pack', allowed => 'hide,dense,squish,pack,full', disallowed => undef },
55 { long => 'color', short => 'c', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
56 { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
60 $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
61 $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
63 $tmp_dir = Maasha::Biopieces::get_tmpdir();
65 $options->{ "short_label" } ||= $options->{ 'table' };
66 $options->{ "long_label" } ||= $options->{ 'table' };
67 $options->{ "color" } ||= join( ",", int( rand( 255 ) ), int( rand( 255 ) ), int( rand( 255 ) ) );
68 $options->{ "chunk_size" } ||= 10_000_000_000; # Due to 32-bit UCSC compilation really large tables cannot be loaded in one go.
70 $file = "$tmp_dir/ucsc_upload.tmp";
75 $fh_out = Maasha::Filesys::file_write_open( $file );
77 while ( $record = Maasha::Biopieces::get_record( $in ) )
79 Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
81 if ( $record->{ "REC_TYPE" } eq "fixed_step" )
85 if ( $entry = Maasha::UCSC::Wiggle::biopiece2fixedstep( $record ) ) {
86 Maasha::UCSC::Wiggle::fixedstep_entry_put( $entry, $fh_out );
89 elsif ( $record->{ "REC_TYPE" } eq "PSL" )
93 Maasha::UCSC::PSL::psl_put_header( $fh_out ) if $first;
94 Maasha::UCSC::PSL::psl_put_entry( $record, $fh_out );
98 elsif ( $record->{ "REC_TYPE" } eq "BED" and $record->{ "SEC_STRUCT" } )
100 # chrom chromStart chromEnd name score strand size secStr conf
104 print $fh_out join ( "\t",
106 $record->{ "CHR_BEG" },
107 $record->{ "CHR_END" } + 1,
109 $record->{ "SCORE" },
110 $record->{ "STRAND" },
112 $record->{ "SEC_STRUCT" },
116 elsif ( $record->{ "REC_TYPE" } eq "BED" )
119 $columns = $record->{ "BED_COLS" };
121 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
122 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
125 elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "CHR" } )
130 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
131 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
134 elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/ )
139 $record->{ "SCORE" } = $record->{ "BIT_SCORE" } * 1000;
141 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
142 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
145 elsif ( $record->{ "REC_TYPE" } eq "VMATCH" and $record->{ "S_ID" } =~ /^chr/i )
150 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
151 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
155 if ( $i == $options->{ "chunk_size" } )
159 if ( $format eq "BED" ) {
160 Maasha::UCSC::bed_upload_to_ucsc( $tmp_dir, $file, $options, $append );
161 } elsif ( $format eq "PSL" ) {
162 Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append );
171 $fh_out = Maasha::Filesys::file_write_open( $file );
179 if ( $format eq "BED" )
181 $type = "bed $columns";
183 Maasha::UCSC::bed_upload_to_ucsc( $tmp_dir, $file, $options, $append );
185 elsif ( $format eq "BED_SS" )
187 $type = "type bed 6 +";
189 Maasha::UCSC::bed_upload_to_ucsc( $tmp_dir, $file, $options, $append );
191 elsif ( $format eq "PSL" )
195 Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append );
197 elsif ( $format eq "WIGGLE" )
199 $options->{ "visibility" } = "full";
201 $wig_file = "$options->{ 'table' }.wig";
202 $wib_file = "$options->{ 'table' }.wib";
204 $wib_dir = "$ENV{ 'HOME' }/ucsc/wib";
206 Maasha::Filesys::dir_create_if_not_exists( $wib_dir );
208 if ( $options->{ 'verbose' } ) {
209 `cd $tmp_dir && wigEncode $file $wig_file $wib_file`;
211 `cd $tmp_dir && wigEncode $file $wig_file $wib_file > /dev/null 2>&1`;
214 Maasha::Common::run( "mv", "$tmp_dir/$wib_file $wib_dir" );
222 Maasha::UCSC::wiggle_upload_to_ucsc( $tmp_dir, $wib_dir, $file, $options );
227 Maasha::UCSC::ucsc_update_config( $options, $type );
229 Maasha::Filesys::dir_remove( $tmp_dir );
232 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
237 $run_time_beg = Maasha::Biopieces::run_time();
239 Maasha::Biopieces::log_biopiece();
245 Maasha::Biopieces::close_stream( $in );
246 Maasha::Biopieces::close_stream( $out );
248 $run_time_end = Maasha::Biopieces::run_time();
250 Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options );
254 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<