3 # Copyright (C) 2007-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # Format a genome creating specified indexes.
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
31 use Maasha::Biopieces;
37 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 my ( $default, $formats, $options, $in, $out, $record, $data_out, $entry,
41 $genome, $dir, $fasta_dir, $phastcons_dir, $fh_out, $vals, $format, $tmp_dir );
43 $tmp_dir = Maasha::Biopieces::get_tmpdir();
44 $default = $ENV{ 'BP_DATA' };
45 $formats = 'fasta,blast,vmatch,phastcons';
47 $options = Maasha::Biopieces::parse_options(
49 { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
50 { long => 'dir', short => 'd', type => 'dir!', mandatory => 'no', default => $default, allowed => undef, disallowed => undef },
51 { long => 'genome', short => 'g', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
52 { long => 'formats', short => 'f', type => 'list', mandatory => 'yes', default => undef, allowed => $formats, disallowed => '0' },
56 $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
57 $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
59 $dir = $options->{ 'dir' };
60 $genome = $options->{ 'genome' };
62 Maasha::Filesys::dir_create_if_not_exists( "$dir/genomes" );
63 Maasha::Filesys::dir_create_if_not_exists( "$dir/genomes/$genome" );
65 if ( grep { $_ =~ /fasta|blast|vmatch/i } @{ $options->{ "formats" } } )
67 if ( -f "$dir/genomes/$genome/fasta/$genome.fna" )
69 $fasta_dir = "$dir/genomes/$genome/fasta";
73 Maasha::Filesys::dir_create_if_not_exists( "$dir/genomes/$genome/fasta" );
75 $fasta_dir = "$dir/genomes/$genome/fasta";
77 $fh_out = Maasha::Filesys::file_write_open( "$fasta_dir/$genome.fna" );
80 elsif ( grep { $_ =~ /phastcons/i } @{ $options->{ "formats" } } )
82 Maasha::Filesys::dir_create_if_not_exists( "$dir/genomes/$genome/phastcons" );
84 $phastcons_dir = "$dir/genomes/$genome/phastcons";
86 $fh_out = Maasha::Filesys::file_write_open( "$phastcons_dir/$genome.pp" );
89 while ( $record = Maasha::Biopieces::get_record( $in ) )
91 if ( $fh_out and $entry = Maasha::Fasta::biopiece2fasta( $record ) )
93 Maasha::Fasta::put_entry( $entry, $fh_out );
95 elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
97 print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
99 $vals = $record->{ 'VALS' };
103 print $fh_out "$vals\n";
106 Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
109 foreach $format ( @{ $options->{ 'formats' } } )
111 if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/fasta/$genome.index" ) }
112 elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) }
113 elsif ( $format =~ /^blat$/i ) { print STDERR "BLAT FORMAT NOT IMPLEMENTED" }
114 elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $tmp_dir ) }
115 elsif ( $format =~ /^phastcons$/i ) { Maasha::UCSC::phastcons_index( "$genome.pp", $phastcons_dir ) }
118 close $fh_out if $fh_out;
120 Maasha::Biopieces::close_stream( $in );
121 Maasha::Biopieces::close_stream( $out );
124 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
129 Maasha::Biopieces::status_set();
135 Maasha::Biopieces::status_log();
139 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<