3 # Copyright (C) 2007-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # Upload data to local UCSC Genome Browser Database.
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 use Maasha::Biopieces;
35 use Maasha::UCSC::Wiggle;
36 use Maasha::UCSC::PSL;
37 use Maasha::UCSC::BED;
40 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
43 my ( $options, $in, $out, $record, $tmp_dir, $file, $wib_file, $wig_file, $wib_dir, $fh_out, $i, $first, $format, $type, $columns, $append, $entry );
45 $options = Maasha::Biopieces::parse_options(
47 { long => 'database', short => 'd', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
48 { long => 'table', short => 't', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
49 { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
50 { long => 'short_label', short => 's', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
51 { long => 'long_label', short => 'l', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
52 { long => 'group', short => 'g', type => 'string', mandatory => 'no', default => $ENV{ 'LOGNAME' }, allowed => undef, disallowed => undef },
53 { long => 'priority', short => 'p', type => 'float', mandatory => 'no', default => 1, allowed => undef, disallowed => undef },
54 { long => 'use_score', short => 'u', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
55 { long => 'visibility', short => 'V', type => 'string', mandatory => 'no', default => 'pack', allowed => 'hide,dense,squish,pack,full', disallowed => undef },
56 { long => 'color', short => 'c', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
57 { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
61 $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
62 $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
64 $tmp_dir = Maasha::Biopieces::get_tmpdir();
66 $options->{ "short_label" } ||= $options->{ 'table' };
67 $options->{ "long_label" } ||= $options->{ 'table' };
68 $options->{ "color" } ||= join( ",", int( rand( 255 ) ), int( rand( 255 ) ), int( rand( 255 ) ) );
69 $options->{ "chunk_size" } ||= 10_000_000_000; # Due to 32-bit UCSC compilation really large tables cannot be loaded in one go.
71 $file = "$tmp_dir/ucsc_upload.tmp";
76 $fh_out = Maasha::Filesys::file_write_open( $file );
78 while ( $record = Maasha::Biopieces::get_record( $in ) )
80 Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
82 if ( $record->{ "REC_TYPE" } eq "fixed_step" )
86 if ( $entry = Maasha::UCSC::Wiggle::biopiece2fixedstep( $record ) ) {
87 Maasha::UCSC::Wiggle::fixedstep_entry_put( $entry, $fh_out );
90 elsif ( $record->{ "REC_TYPE" } eq "PSL" )
94 Maasha::UCSC::PSL::psl_put_header( $fh_out ) if $first;
95 Maasha::UCSC::PSL::psl_put_entry( $record, $fh_out );
99 elsif ( $record->{ "REC_TYPE" } eq "BED" and $record->{ "SEC_STRUCT" } )
101 # chrom chromStart chromEnd name score strand size secStr conf
105 print $fh_out join ( "\t",
107 $record->{ "CHR_BEG" },
108 $record->{ "CHR_END" } + 1,
110 $record->{ "SCORE" },
111 $record->{ "STRAND" },
113 $record->{ "SEC_STRUCT" },
117 elsif ( $record->{ "REC_TYPE" } eq "BED" )
120 $columns = $record->{ "BED_COLS" };
122 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
123 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
126 elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "CHR" } )
131 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
132 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
135 elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/ )
140 $record->{ "SCORE" } = $record->{ "BIT_SCORE" } * 1000;
142 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
143 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
146 elsif ( $record->{ "REC_TYPE" } eq "VMATCH" and $record->{ "S_ID" } =~ /^chr/i )
151 if ( $entry = Maasha::UCSC::BED::biopiece2bed( $record, $columns ) ) {
152 Maasha::UCSC::BED::bed_entry_put( $entry, $fh_out, $columns, $options->{ 'check' } );
156 if ( $i == $options->{ "chunk_size" } )
160 if ( $format eq "BED" ) {
161 Maasha::UCSC::bed_upload_to_ucsc( $tmp_dir, $file, $options, $append );
162 } elsif ( $format eq "PSL" ) {
163 Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append );
172 $fh_out = Maasha::Filesys::file_write_open( $file );
180 if ( $format eq "BED" )
182 $type = "bed $columns";
184 Maasha::UCSC::bed_upload_to_ucsc( $tmp_dir, $file, $options, $append );
186 elsif ( $format eq "BED_SS" )
188 $type = "type bed 6 +";
190 Maasha::UCSC::bed_upload_to_ucsc( $tmp_dir, $file, $options, $append );
192 elsif ( $format eq "PSL" )
196 Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append );
198 elsif ( $format eq "WIGGLE" )
200 $options->{ "visibility" } = "full";
202 $wig_file = "$options->{ 'table' }.wig";
203 $wib_file = "$options->{ 'table' }.wib";
205 $wib_dir = "$ENV{ 'HOME' }/ucsc/wib";
207 Maasha::Filesys::dir_create_if_not_exists( $wib_dir );
209 if ( $options->{ 'verbose' } ) {
210 `cd $tmp_dir && wigEncode $file $wig_file $wib_file`;
212 `cd $tmp_dir && wigEncode $file $wig_file $wib_file > /dev/null 2>&1`;
215 Maasha::Common::run( "mv", "$tmp_dir/$wib_file $wib_dir" );
223 Maasha::UCSC::wiggle_upload_to_ucsc( $tmp_dir, $wib_dir, $file, $options );
228 Maasha::UCSC::ucsc_update_config( $options, $type );
230 Maasha::Biopieces::close_stream( $in );
231 Maasha::Biopieces::close_stream( $out );
234 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
239 Maasha::Biopieces::status_set();
245 Maasha::Biopieces::status_log();
249 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<