From: martinahansen Date: Mon, 14 Jul 2008 05:04:20 +0000 (+0000) Subject: added read_fixedstep X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=5d5968148bb0f5e97f6ff73cfdf77a8269f57343;p=biopieces.git added read_fixedstep git-svn-id: http://biopieces.googlecode.com/svn/trunk@144 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/read_fixedstep b/bp_bin/read_fixedstep new file mode 100755 index 0000000..4cd1d44 --- /dev/null +++ b/bp_bin/read_fixedstep @@ -0,0 +1,6 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +use Maasha::Biopieces; diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index 8c9358b..aac2215 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -164,93 +164,94 @@ sub run_script $in = read_stream( $options->{ "stream_in" } ); $out = write_stream( $options->{ "stream_out" } ); - if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) } - elsif ( $script eq "list_biopieces" ) { script_list_biopieces( $in, $out, $options ) } - elsif ( $script eq "list_genomes" ) { script_list_genomes( $in, $out, $options ) } - elsif ( $script eq "read_fasta" ) { script_read_fasta( $in, $out, $options ) } - elsif ( $script eq "read_tab" ) { script_read_tab( $in, $out, $options ) } - elsif ( $script eq "read_psl" ) { script_read_psl( $in, $out, $options ) } - elsif ( $script eq "read_bed" ) { script_read_bed( $in, $out, $options ) } - elsif ( $script eq "read_blast_tab" ) { script_read_blast_tab( $in, $out, $options ) } - elsif ( $script eq "read_embl" ) { script_read_embl( $in, $out, $options ) } - elsif ( $script eq "read_stockholm" ) { script_read_stockholm( $in, $out, $options ) } - elsif ( $script eq "read_phastcons" ) { script_read_phastcons( $in, $out, $options ) } - elsif ( $script eq "read_soft" ) { script_read_soft( $in, $out, $options ) } - elsif ( $script eq "read_gff" ) { script_read_gff( $in, $out, $options ) } - elsif ( $script eq "read_2bit" ) { script_read_2bit( $in, $out, $options ) } - elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) } - elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) } - elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) } - elsif ( $script eq "format_genome" ) { script_format_genome( $in, $out, $options ) } - elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) } - elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) } - elsif ( $script eq "shuffle_seq" ) { script_shuffle_seq( $in, $out, $options ) } - elsif ( $script eq "analyze_seq" ) { script_analyze_seq( $in, $out, $options ) } - elsif ( $script eq "analyze_tags" ) { script_analyze_tags( $in, $out, $options ) } - elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) } - elsif ( $script eq "oligo_freq" ) { script_oligo_freq( $in, $out, $options ) } - elsif ( $script eq "create_weight_matrix" ) { script_create_weight_matrix( $in, $out, $options ) } - elsif ( $script eq "calc_bit_scores" ) { script_calc_bit_scores( $in, $out, $options ) } - elsif ( $script eq "reverse_seq" ) { script_reverse_seq( $in, $out, $options ) } - elsif ( $script eq "complement_seq" ) { script_complement_seq( $in, $out, $options ) } - elsif ( $script eq "remove_indels" ) { script_remove_indels( $in, $out, $options ) } - elsif ( $script eq "transliterate_seq" ) { script_transliterate_seq( $in, $out, $options ) } - elsif ( $script eq "transliterate_vals" ) { script_transliterate_vals( $in, $out, $options ) } - elsif ( $script eq "translate_seq" ) { script_translate_seq( $in, $out, $options ) } - elsif ( $script eq "extract_seq" ) { script_extract_seq( $in, $out, $options ) } - elsif ( $script eq "get_genome_seq" ) { script_get_genome_seq( $in, $out, $options ) } - elsif ( $script eq "get_genome_align" ) { script_get_genome_align( $in, $out, $options ) } - elsif ( $script eq "get_genome_phastcons" ) { script_get_genome_phastcons( $in, $out, $options ) } - elsif ( $script eq "fold_seq" ) { script_fold_seq( $in, $out, $options ) } - elsif ( $script eq "split_seq" ) { script_split_seq( $in, $out, $options ) } - elsif ( $script eq "split_bed" ) { script_split_bed( $in, $out, $options ) } - elsif ( $script eq "align_seq" ) { script_align_seq( $in, $out, $options ) } - elsif ( $script eq "tile_seq" ) { script_tile_seq( $in, $out, $options ) } - elsif ( $script eq "invert_align" ) { script_invert_align( $in, $out, $options ) } - elsif ( $script eq "patscan_seq" ) { script_patscan_seq( $in, $out, $options ) } - elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) } - elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) } - elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) } - elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) } - elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) } - elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) } - elsif ( $script eq "write_fasta" ) { script_write_fasta( $in, $out, $options, $options ) } - elsif ( $script eq "write_align" ) { script_write_align( $in, $out, $options ) } - elsif ( $script eq "write_blast" ) { script_write_blast( $in, $out, $options ) } - elsif ( $script eq "write_tab" ) { script_write_tab( $in, $out, $options ) } - elsif ( $script eq "write_bed" ) { script_write_bed( $in, $out, $options ) } - elsif ( $script eq "write_psl" ) { script_write_psl( $in, $out, $options ) } - elsif ( $script eq "write_2bit" ) { script_write_2bit( $in, $out, $options, $options ) } - elsif ( $script eq "write_solid" ) { script_write_solid( $in, $out, $options, $options ) } - elsif ( $script eq "head_records" ) { script_head_records( $in, $out, $options ) } - elsif ( $script eq "remove_keys" ) { script_remove_keys( $in, $out, $options ) } - elsif ( $script eq "rename_keys" ) { script_rename_keys( $in, $out, $options ) } - elsif ( $script eq "uniq_vals" ) { script_uniq_vals( $in, $out, $options ) } - elsif ( $script eq "merge_vals" ) { script_merge_vals( $in, $out, $options ) } - elsif ( $script eq "grab" ) { script_grab( $in, $out, $options ) } - elsif ( $script eq "compute" ) { script_compute( $in, $out, $options ) } - elsif ( $script eq "flip_tab" ) { script_flip_tab( $in, $out, $options ) } - elsif ( $script eq "add_ident" ) { script_add_ident( $in, $out, $options ) } - elsif ( $script eq "count_records" ) { script_count_records( $in, $out, $options ) } - elsif ( $script eq "random_records" ) { script_random_records( $in, $out, $options ) } - elsif ( $script eq "sort_records" ) { script_sort_records( $in, $out, $options ) } - elsif ( $script eq "count_vals" ) { script_count_vals( $in, $out, $options ) } - elsif ( $script eq "plot_histogram" ) { script_plot_histogram( $in, $out, $options ) } - elsif ( $script eq "plot_lendist" ) { script_plot_lendist( $in, $out, $options ) } - elsif ( $script eq "plot_chrdist" ) { script_plot_chrdist( $in, $out, $options ) } - elsif ( $script eq "plot_karyogram" ) { script_plot_karyogram( $in, $out, $options ) } - elsif ( $script eq "plot_matches" ) { script_plot_matches( $in, $out, $options ) } - elsif ( $script eq "plot_seqlogo" ) { script_plot_seqlogo( $in, $out, $options ) } - elsif ( $script eq "plot_phastcons_profiles" ) { script_plot_phastcons_profiles( $in, $out, $options ) } - elsif ( $script eq "analyze_bed" ) { script_analyze_bed( $in, $out, $options ) } - elsif ( $script eq "analyze_vals" ) { script_analyze_vals( $in, $out, $options ) } - elsif ( $script eq "length_vals" ) { script_length_vals( $in, $out, $options ) } - elsif ( $script eq "sum_vals" ) { script_sum_vals( $in, $out, $options ) } - elsif ( $script eq "mean_vals" ) { script_mean_vals( $in, $out, $options ) } - elsif ( $script eq "median_vals" ) { script_median_vals( $in, $out, $options ) } - elsif ( $script eq "max_vals" ) { script_max_vals( $in, $out, $options ) } - elsif ( $script eq "min_vals" ) { script_min_vals( $in, $out, $options ) } - elsif ( $script eq "upload_to_ucsc" ) { script_upload_to_ucsc( $in, $out, $options ) } + if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) } + elsif ( $script eq "list_biopieces" ) { script_list_biopieces( $in, $out, $options ) } + elsif ( $script eq "list_genomes" ) { script_list_genomes( $in, $out, $options ) } + elsif ( $script eq "read_fasta" ) { script_read_fasta( $in, $out, $options ) } + elsif ( $script eq "read_tab" ) { script_read_tab( $in, $out, $options ) } + elsif ( $script eq "read_psl" ) { script_read_psl( $in, $out, $options ) } + elsif ( $script eq "read_bed" ) { script_read_bed( $in, $out, $options ) } + elsif ( $script eq "read_fixedstep" ) { script_read_fixedstep( $in, $out, $options ) } + elsif ( $script eq "read_blast_tab" ) { script_read_blast_tab( $in, $out, $options ) } + elsif ( $script eq "read_embl" ) { script_read_embl( $in, $out, $options ) } + elsif ( $script eq "read_stockholm" ) { script_read_stockholm( $in, $out, $options ) } + elsif ( $script eq "read_phastcons" ) { script_read_phastcons( $in, $out, $options ) } + elsif ( $script eq "read_soft" ) { script_read_soft( $in, $out, $options ) } + elsif ( $script eq "read_gff" ) { script_read_gff( $in, $out, $options ) } + elsif ( $script eq "read_2bit" ) { script_read_2bit( $in, $out, $options ) } + elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) } + elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) } + elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) } + elsif ( $script eq "format_genome" ) { script_format_genome( $in, $out, $options ) } + elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) } + elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) } + elsif ( $script eq "shuffle_seq" ) { script_shuffle_seq( $in, $out, $options ) } + elsif ( $script eq "analyze_seq" ) { script_analyze_seq( $in, $out, $options ) } + elsif ( $script eq "analyze_tags" ) { script_analyze_tags( $in, $out, $options ) } + elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) } + elsif ( $script eq "oligo_freq" ) { script_oligo_freq( $in, $out, $options ) } + elsif ( $script eq "create_weight_matrix" ) { script_create_weight_matrix( $in, $out, $options ) } + elsif ( $script eq "calc_bit_scores" ) { script_calc_bit_scores( $in, $out, $options ) } + elsif ( $script eq "reverse_seq" ) { script_reverse_seq( $in, $out, $options ) } + elsif ( $script eq "complement_seq" ) { script_complement_seq( $in, $out, $options ) } + elsif ( $script eq "remove_indels" ) { script_remove_indels( $in, $out, $options ) } + elsif ( $script eq "transliterate_seq" ) { script_transliterate_seq( $in, $out, $options ) } + elsif ( $script eq "transliterate_vals" ) { script_transliterate_vals( $in, $out, $options ) } + elsif ( $script eq "translate_seq" ) { script_translate_seq( $in, $out, $options ) } + elsif ( $script eq "extract_seq" ) { script_extract_seq( $in, $out, $options ) } + elsif ( $script eq "get_genome_seq" ) { script_get_genome_seq( $in, $out, $options ) } + elsif ( $script eq "get_genome_align" ) { script_get_genome_align( $in, $out, $options ) } + elsif ( $script eq "get_genome_phastcons" ) { script_get_genome_phastcons( $in, $out, $options ) } + elsif ( $script eq "fold_seq" ) { script_fold_seq( $in, $out, $options ) } + elsif ( $script eq "split_seq" ) { script_split_seq( $in, $out, $options ) } + elsif ( $script eq "split_bed" ) { script_split_bed( $in, $out, $options ) } + elsif ( $script eq "align_seq" ) { script_align_seq( $in, $out, $options ) } + elsif ( $script eq "tile_seq" ) { script_tile_seq( $in, $out, $options ) } + elsif ( $script eq "invert_align" ) { script_invert_align( $in, $out, $options ) } + elsif ( $script eq "patscan_seq" ) { script_patscan_seq( $in, $out, $options ) } + elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) } + elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) } + elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) } + elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) } + elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) } + elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) } + elsif ( $script eq "write_fasta" ) { script_write_fasta( $in, $out, $options ) } + elsif ( $script eq "write_align" ) { script_write_align( $in, $out, $options ) } + elsif ( $script eq "write_blast" ) { script_write_blast( $in, $out, $options ) } + elsif ( $script eq "write_tab" ) { script_write_tab( $in, $out, $options ) } + elsif ( $script eq "write_bed" ) { script_write_bed( $in, $out, $options ) } + elsif ( $script eq "write_psl" ) { script_write_psl( $in, $out, $options ) } + elsif ( $script eq "write_2bit" ) { script_write_2bit( $in, $out, $options ) } + elsif ( $script eq "write_solid" ) { script_write_solid( $in, $out, $options ) } + elsif ( $script eq "head_records" ) { script_head_records( $in, $out, $options ) } + elsif ( $script eq "remove_keys" ) { script_remove_keys( $in, $out, $options ) } + elsif ( $script eq "rename_keys" ) { script_rename_keys( $in, $out, $options ) } + elsif ( $script eq "uniq_vals" ) { script_uniq_vals( $in, $out, $options ) } + elsif ( $script eq "merge_vals" ) { script_merge_vals( $in, $out, $options ) } + elsif ( $script eq "grab" ) { script_grab( $in, $out, $options ) } + elsif ( $script eq "compute" ) { script_compute( $in, $out, $options ) } + elsif ( $script eq "flip_tab" ) { script_flip_tab( $in, $out, $options ) } + elsif ( $script eq "add_ident" ) { script_add_ident( $in, $out, $options ) } + elsif ( $script eq "count_records" ) { script_count_records( $in, $out, $options ) } + elsif ( $script eq "random_records" ) { script_random_records( $in, $out, $options ) } + elsif ( $script eq "sort_records" ) { script_sort_records( $in, $out, $options ) } + elsif ( $script eq "count_vals" ) { script_count_vals( $in, $out, $options ) } + elsif ( $script eq "plot_histogram" ) { script_plot_histogram( $in, $out, $options ) } + elsif ( $script eq "plot_lendist" ) { script_plot_lendist( $in, $out, $options ) } + elsif ( $script eq "plot_chrdist" ) { script_plot_chrdist( $in, $out, $options ) } + elsif ( $script eq "plot_karyogram" ) { script_plot_karyogram( $in, $out, $options ) } + elsif ( $script eq "plot_matches" ) { script_plot_matches( $in, $out, $options ) } + elsif ( $script eq "plot_seqlogo" ) { script_plot_seqlogo( $in, $out, $options ) } + elsif ( $script eq "plot_phastcons_profiles" ) { script_plot_phastcons_profiles( $in, $out, $options ) } + elsif ( $script eq "analyze_bed" ) { script_analyze_bed( $in, $out, $options ) } + elsif ( $script eq "analyze_vals" ) { script_analyze_vals( $in, $out, $options ) } + elsif ( $script eq "length_vals" ) { script_length_vals( $in, $out, $options ) } + elsif ( $script eq "sum_vals" ) { script_sum_vals( $in, $out, $options ) } + elsif ( $script eq "mean_vals" ) { script_mean_vals( $in, $out, $options ) } + elsif ( $script eq "median_vals" ) { script_median_vals( $in, $out, $options ) } + elsif ( $script eq "max_vals" ) { script_max_vals( $in, $out, $options ) } + elsif ( $script eq "min_vals" ) { script_min_vals( $in, $out, $options ) } + elsif ( $script eq "upload_to_ucsc" ) { script_upload_to_ucsc( $in, $out, $options ) } close $in if defined $in; close $out; @@ -311,6 +312,13 @@ sub get_options num|n=s ); } + elsif ( $script eq "read_fixedstep" ) + { + @options = qw( + data_in|i=s + num|n=s + ); + } elsif ( $script eq "read_blast_tab" ) { @options = qw( @@ -1346,6 +1354,59 @@ sub script_read_bed } +sub script_read_fixedstep +{ + # Martin A. Hansen, Juli 2008. + + # Read fixedStep wiggle format from stream or file. + + my ( $in, # handle to in stream + $out, # handle to out stream + $options, # options hash + ) = @_; + + # Returns nothing. + + my ( $file, $record, $entry, $head, $chr, $chr_beg, $step, $data_in, $num ); + + while ( $record = get_record( $in ) ) { + put_record( $record, $out ); + } + + $num = 1; + + foreach $file ( @{ $options->{ "files" } } ) + { + $data_in = Maasha::Common::read_open( $file ); + + while ( $entry = Maasha::UCSC::fixedstep_get_entry( $data_in ) ) + { + $head = shift @{ $entry }; + + if ( $head =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ ) + { + $record->{ "CHR" } = $1; + $record->{ "CHR_BEG" } = $2; + $record->{ "STEP" } = $3; + $record->{ "VALS" } = join ",", @{ $entry }; + } + + put_record( $record, $out ); + + goto NUM if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + + close $data_in; + } + + NUM: + + close $data_in if $data_in; +} + + sub script_read_blast_tab { # Martin A. Hansen, September 2007. @@ -1592,7 +1653,7 @@ sub script_read_phastcons { $data_in = Maasha::Common::read_open( $file ); - while ( $entry = Maasha::UCSC::phastcons_get_entry( $data_in ) ) + while ( $entry = Maasha::UCSC::fixedstep_get_entry( $data_in ) ) { @records = Maasha::UCSC::phastcons_parse_entry( $entry, $options ); diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 8e02dde..4f932e3 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -834,10 +834,10 @@ sub update_my_tracks } -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> fixedStep format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -sub phastcons_get_entry +sub fixedstep_get_entry { # Martin A. Hansen, December 2007. @@ -868,6 +868,9 @@ sub phastcons_get_entry } +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + sub phastcons_parse_entry { # Martin A. Hansen, December 2007. @@ -1023,7 +1026,7 @@ sub phastcons_index_create $pos = 0; - while ( $entry = Maasha::UCSC::phastcons_get_entry( $fh ) ) + while ( $entry = Maasha::UCSC::fixedstep_get_entry( $fh ) ) { $locator = shift @{ $entry };