From: martinahansen Date: Mon, 14 Jul 2008 08:32:34 +0000 (+0000) Subject: added phastcons to format_genome X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=4f7a684fc91d1759b00d78ac98bc4a3fddff9748;p=biopieces.git added phastcons to format_genome git-svn-id: http://biopieces.googlecode.com/svn/trunk@149 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index d4ccc17..2a7ef7d 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -2020,7 +2020,7 @@ sub script_format_genome # Returns nothing. - my ( $dir, $genome, $fasta_dir, $fh_out, $record, $format, $index ); + my ( $dir, $genome, $fasta_dir, $phastcons_dir, $vals, $fh_out, $record, $format, $index ); $dir = $options->{ 'dir' } || $ENV{ 'BP_DATA' }; $genome = $options->{ 'genome' }; @@ -2029,40 +2029,57 @@ sub script_format_genome Maasha::Common::dir_create_if_not_exists( "$dir/genomes" ); Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome" ); - if ( -f "$dir/genomes/$genome/fasta/$genome.fna" ) + if ( grep { $_ =~ /fasta|blast|vmatch/i } @{ $options->{ "formats" } } ) { - $fasta_dir = "$dir/genomes/$genome/fasta"; - } - elsif ( grep { $_ =~ /fasta/i } @{ $options->{ "formats" } } ) - { - Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/fasta" ); + if ( -f "$dir/genomes/$genome/fasta/$genome.fna" ) + { + $fasta_dir = "$dir/genomes/$genome/fasta"; + } + else + { + Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/fasta" ); - $fasta_dir = "$dir/genomes/$genome/fasta"; + $fasta_dir = "$dir/genomes/$genome/fasta"; - $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" ); + $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" ); + } } - else + elsif ( grep { $_ =~ /phastcons/i } @{ $options->{ "formats" } } ) { - $fasta_dir = $BP_TMP; + Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/phastcons" ); + + $phastcons_dir = "$dir/genomes/$genome/phastcons"; - $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" ); + $fh_out = Maasha::Common::write_open( "$phastcons_dir/$genome.pp" ); } while ( $record = get_record( $in ) ) { - if ( $fh_out and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) { + if ( $fh_out and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) + { Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, $options->{ "wrap" } ); } + elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } ) + { + print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n"; + + $vals = $record->{ 'VALS' }; + + $vals =~ tr/,/\n/; + + print $fh_out "$vals\n"; + } put_record( $record, $out ) if not $options->{ "no_stream" }; } foreach $format ( @{ $options->{ 'formats' } } ) { - if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/fasta/$genome.index" ) } - elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) } - elsif ( $format =~ /^blat$/i ) { print STDERR "BLAT FORMAT NOT IMPLEMENTED" } - elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $BP_TMP ) } + if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/fasta/$genome.index" ) } + elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) } + elsif ( $format =~ /^blat$/i ) { print STDERR "BLAT FORMAT NOT IMPLEMENTED" } + elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $BP_TMP ) } + elsif ( $format =~ /^phastcons$/i ) { Maasha::UCSC::phastcons_index( "$genome.pp", $phastcons_dir ) } } close $fh_out if $fh_out; diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 239dea1..dfc5255 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -1098,6 +1098,26 @@ sub fixedStep_index_lookup # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +sub phastcons_index +{ + # Martin A. Hansen, July 2008 + + # Create a fixedStep index for PhastCons data. + + my ( $file, # file to index + $dir, # dir with file + ) = @_; + + # Returns nothing. + + my ( $index ); + + $index = fixedstep_index_create( "$dir/$file" ); + + fixedstep_index_store( "$dir/$file.index", $index ); +} + + sub phastcons_parse_entry { # Martin A. Hansen, December 2007.