# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> LOG <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-my $log_fh = Maasha::Common::append_open( "$ENV{ 'BP_TMP' }/biopieces.log" );
+my $log_global = Maasha::Common::append_open( "$ENV{ 'BP_LOG' }/biopieces.log" );
+my $log_local = Maasha::Common::append_open( "$ENV{ 'HOME' }/.biopieces.log" );
-$log_fh->autoflush( 1 );
+$log_global->autoflush( 1 );
+$log_local->autoflush( 1 );
-&log( $log_fh, $script, \@ARGV );
+&log( $log_global, $script, \@ARGV );
+&log( $log_local, $script, \@ARGV );
-close $log_fh;
+close $log_global;
+close $log_local;
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RUN SCRIPT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$options = get_options( $script );
- $script = "print_usage" if ( -t STDIN and not @ARGV and $script ne "list_biopieces" );
-
- $in = read_stream( $options->{ "stream_in" } );
- $out = write_stream( $options->{ "stream_out" } );
-
- if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) }
- elsif ( $script eq "list_biopieces" ) { script_list_biopieces( $in, $out, $options ) }
- elsif ( $script eq "read_fasta" ) { script_read_fasta( $in, $out, $options ) }
- elsif ( $script eq "read_tab" ) { script_read_tab( $in, $out, $options ) }
- elsif ( $script eq "read_psl" ) { script_read_psl( $in, $out, $options ) }
- elsif ( $script eq "read_bed" ) { script_read_bed( $in, $out, $options ) }
- elsif ( $script eq "read_blast_tab" ) { script_read_blast_tab( $in, $out, $options ) }
- elsif ( $script eq "read_embl" ) { script_read_embl( $in, $out, $options ) }
- elsif ( $script eq "read_stockholm" ) { script_read_stockholm( $in, $out, $options ) }
- elsif ( $script eq "read_phastcons" ) { script_read_phastcons( $in, $out, $options ) }
- elsif ( $script eq "read_soft" ) { script_read_soft( $in, $out, $options ) }
- elsif ( $script eq "read_gff" ) { script_read_gff( $in, $out, $options ) }
- elsif ( $script eq "read_2bit" ) { script_read_2bit( $in, $out, $options ) }
- elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) }
- elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) }
- elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) }
- elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) }
- elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) }
- elsif ( $script eq "shuffle_seq" ) { script_shuffle_seq( $in, $out, $options ) }
- elsif ( $script eq "analyze_seq" ) { script_analyze_seq( $in, $out, $options ) }
- elsif ( $script eq "analyze_tags" ) { script_analyze_tags( $in, $out, $options ) }
- elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) }
- elsif ( $script eq "oligo_freq" ) { script_oligo_freq( $in, $out, $options ) }
- elsif ( $script eq "create_weight_matrix" ) { script_create_weight_matrix( $in, $out, $options ) }
- elsif ( $script eq "calc_bit_scores" ) { script_calc_bit_scores( $in, $out, $options ) }
- elsif ( $script eq "reverse_seq" ) { script_reverse_seq( $in, $out, $options ) }
- elsif ( $script eq "complement_seq" ) { script_complement_seq( $in, $out, $options ) }
- elsif ( $script eq "remove_indels" ) { script_remove_indels( $in, $out, $options ) }
- elsif ( $script eq "transliterate_seq" ) { script_transliterate_seq( $in, $out, $options ) }
- elsif ( $script eq "transliterate_vals" ) { script_transliterate_vals( $in, $out, $options ) }
- elsif ( $script eq "translate_seq" ) { script_translate_seq( $in, $out, $options ) }
- elsif ( $script eq "extract_seq" ) { script_extract_seq( $in, $out, $options ) }
- elsif ( $script eq "get_genome_seq" ) { script_get_genome_seq( $in, $out, $options ) }
- elsif ( $script eq "get_genome_align" ) { script_get_genome_align( $in, $out, $options ) }
- elsif ( $script eq "get_genome_phastcons" ) { script_get_genome_phastcons( $in, $out, $options ) }
- elsif ( $script eq "fold_seq" ) { script_fold_seq( $in, $out, $options ) }
- elsif ( $script eq "split_seq" ) { script_split_seq( $in, $out, $options ) }
- elsif ( $script eq "split_bed" ) { script_split_bed( $in, $out, $options ) }
- elsif ( $script eq "align_seq" ) { script_align_seq( $in, $out, $options ) }
- elsif ( $script eq "tile_seq" ) { script_tile_seq( $in, $out, $options ) }
- elsif ( $script eq "invert_align" ) { script_invert_align( $in, $out, $options ) }
- elsif ( $script eq "patscan_seq" ) { script_patscan_seq( $in, $out, $options ) }
- elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) }
- elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) }
- elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) }
- elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) }
- elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) }
- elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) }
- elsif ( $script eq "write_fasta" ) { script_write_fasta( $in, $out, $options, $options ) }
- elsif ( $script eq "write_align" ) { script_write_align( $in, $out, $options ) }
- elsif ( $script eq "write_blast" ) { script_write_blast( $in, $out, $options ) }
- elsif ( $script eq "write_tab" ) { script_write_tab( $in, $out, $options ) }
- elsif ( $script eq "write_bed" ) { script_write_bed( $in, $out, $options ) }
- elsif ( $script eq "write_psl" ) { script_write_psl( $in, $out, $options ) }
- elsif ( $script eq "write_2bit" ) { script_write_2bit( $in, $out, $options, $options ) }
- elsif ( $script eq "write_solid" ) { script_write_solid( $in, $out, $options, $options ) }
- elsif ( $script eq "head_records" ) { script_head_records( $in, $out, $options ) }
- elsif ( $script eq "remove_keys" ) { script_remove_keys( $in, $out, $options ) }
- elsif ( $script eq "rename_keys" ) { script_rename_keys( $in, $out, $options ) }
- elsif ( $script eq "uniq_vals" ) { script_uniq_vals( $in, $out, $options ) }
- elsif ( $script eq "merge_vals" ) { script_merge_vals( $in, $out, $options ) }
- elsif ( $script eq "grab" ) { script_grab( $in, $out, $options ) }
- elsif ( $script eq "compute" ) { script_compute( $in, $out, $options ) }
- elsif ( $script eq "flip_tab" ) { script_flip_tab( $in, $out, $options ) }
- elsif ( $script eq "add_ident" ) { script_add_ident( $in, $out, $options ) }
- elsif ( $script eq "count_records" ) { script_count_records( $in, $out, $options ) }
- elsif ( $script eq "random_records" ) { script_random_records( $in, $out, $options ) }
- elsif ( $script eq "sort_records" ) { script_sort_records( $in, $out, $options ) }
- elsif ( $script eq "count_vals" ) { script_count_vals( $in, $out, $options ) }
- elsif ( $script eq "plot_histogram" ) { script_plot_histogram( $in, $out, $options ) }
- elsif ( $script eq "plot_lendist" ) { script_plot_lendist( $in, $out, $options ) }
- elsif ( $script eq "plot_chrdist" ) { script_plot_chrdist( $in, $out, $options ) }
- elsif ( $script eq "plot_karyogram" ) { script_plot_karyogram( $in, $out, $options ) }
- elsif ( $script eq "plot_matches" ) { script_plot_matches( $in, $out, $options ) }
- elsif ( $script eq "plot_seqlogo" ) { script_plot_seqlogo( $in, $out, $options ) }
- elsif ( $script eq "plot_phastcons_profiles" ) { script_plot_phastcons_profiles( $in, $out, $options ) }
- elsif ( $script eq "analyze_bed" ) { script_analyze_bed( $in, $out, $options ) }
- elsif ( $script eq "analyze_vals" ) { script_analyze_vals( $in, $out, $options ) }
- elsif ( $script eq "length_vals" ) { script_length_vals( $in, $out, $options ) }
- elsif ( $script eq "sum_vals" ) { script_sum_vals( $in, $out, $options ) }
- elsif ( $script eq "mean_vals" ) { script_mean_vals( $in, $out, $options ) }
- elsif ( $script eq "median_vals" ) { script_median_vals( $in, $out, $options ) }
- elsif ( $script eq "max_vals" ) { script_max_vals( $in, $out, $options ) }
- elsif ( $script eq "min_vals" ) { script_min_vals( $in, $out, $options ) }
- elsif ( $script eq "upload_to_ucsc" ) { script_upload_to_ucsc( $in, $out, $options ) }
+ $options->{ "SCRIPT" } = $script;
+
+ if ( $script ne "list_biopieces" and $script ne "list_genomes" ) {
+ $script = "print_usage" if ( -t STDIN and keys %{ $options } <= 1 or $options->{ 'help' } );
+ }
+
+ $in = read_stream( $options->{ "stream_in" } );
+ $out = write_stream( $options->{ "stream_out" } );
+
+ if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) }
+ elsif ( $script eq "list_biopieces" ) { script_list_biopieces( $in, $out, $options ) }
+ elsif ( $script eq "list_genomes" ) { script_list_genomes( $in, $out, $options ) }
+ elsif ( $script eq "read_fasta" ) { script_read_fasta( $in, $out, $options ) }
+ elsif ( $script eq "read_tab" ) { script_read_tab( $in, $out, $options ) }
+ elsif ( $script eq "read_psl" ) { script_read_psl( $in, $out, $options ) }
+ elsif ( $script eq "read_bed" ) { script_read_bed( $in, $out, $options ) }
+ elsif ( $script eq "read_fixedstep" ) { script_read_fixedstep( $in, $out, $options ) }
+ elsif ( $script eq "read_blast_tab" ) { script_read_blast_tab( $in, $out, $options ) }
+ elsif ( $script eq "read_embl" ) { script_read_embl( $in, $out, $options ) }
+ elsif ( $script eq "read_stockholm" ) { script_read_stockholm( $in, $out, $options ) }
+ elsif ( $script eq "read_phastcons" ) { script_read_phastcons( $in, $out, $options ) }
+ elsif ( $script eq "read_soft" ) { script_read_soft( $in, $out, $options ) }
+ elsif ( $script eq "read_gff" ) { script_read_gff( $in, $out, $options ) }
+ elsif ( $script eq "read_2bit" ) { script_read_2bit( $in, $out, $options ) }
+ elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) }
+ elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) }
+ elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) }
+ elsif ( $script eq "format_genome" ) { script_format_genome( $in, $out, $options ) }
+ elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) }
+ elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) }
+ elsif ( $script eq "shuffle_seq" ) { script_shuffle_seq( $in, $out, $options ) }
+ elsif ( $script eq "analyze_seq" ) { script_analyze_seq( $in, $out, $options ) }
+ elsif ( $script eq "analyze_tags" ) { script_analyze_tags( $in, $out, $options ) }
+ elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) }
+ elsif ( $script eq "oligo_freq" ) { script_oligo_freq( $in, $out, $options ) }
+ elsif ( $script eq "create_weight_matrix" ) { script_create_weight_matrix( $in, $out, $options ) }
+ elsif ( $script eq "calc_bit_scores" ) { script_calc_bit_scores( $in, $out, $options ) }
+ elsif ( $script eq "reverse_seq" ) { script_reverse_seq( $in, $out, $options ) }
+ elsif ( $script eq "complement_seq" ) { script_complement_seq( $in, $out, $options ) }
+ elsif ( $script eq "remove_indels" ) { script_remove_indels( $in, $out, $options ) }
+ elsif ( $script eq "transliterate_seq" ) { script_transliterate_seq( $in, $out, $options ) }
+ elsif ( $script eq "transliterate_vals" ) { script_transliterate_vals( $in, $out, $options ) }
+ elsif ( $script eq "translate_seq" ) { script_translate_seq( $in, $out, $options ) }
+ elsif ( $script eq "extract_seq" ) { script_extract_seq( $in, $out, $options ) }
+ elsif ( $script eq "get_genome_seq" ) { script_get_genome_seq( $in, $out, $options ) }
+ elsif ( $script eq "get_genome_align" ) { script_get_genome_align( $in, $out, $options ) }
+ elsif ( $script eq "get_genome_phastcons" ) { script_get_genome_phastcons( $in, $out, $options ) }
+ elsif ( $script eq "fold_seq" ) { script_fold_seq( $in, $out, $options ) }
+ elsif ( $script eq "split_seq" ) { script_split_seq( $in, $out, $options ) }
+ elsif ( $script eq "split_bed" ) { script_split_bed( $in, $out, $options ) }
+ elsif ( $script eq "align_seq" ) { script_align_seq( $in, $out, $options ) }
+ elsif ( $script eq "tile_seq" ) { script_tile_seq( $in, $out, $options ) }
+ elsif ( $script eq "invert_align" ) { script_invert_align( $in, $out, $options ) }
+ elsif ( $script eq "patscan_seq" ) { script_patscan_seq( $in, $out, $options ) }
+ elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) }
+ elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) }
+ elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) }
+ elsif ( $script eq "soap_seq" ) { script_soap_seq( $in, $out, $options ) }
+ elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) }
+ elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) }
+ elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) }
+ elsif ( $script eq "write_fasta" ) { script_write_fasta( $in, $out, $options ) }
+ elsif ( $script eq "write_align" ) { script_write_align( $in, $out, $options ) }
+ elsif ( $script eq "write_blast" ) { script_write_blast( $in, $out, $options ) }
+ elsif ( $script eq "write_tab" ) { script_write_tab( $in, $out, $options ) }
+ elsif ( $script eq "write_bed" ) { script_write_bed( $in, $out, $options ) }
+ elsif ( $script eq "write_psl" ) { script_write_psl( $in, $out, $options ) }
+ elsif ( $script eq "write_fixedstep" ) { script_write_fixedstep( $in, $out, $options ) }
+ elsif ( $script eq "write_2bit" ) { script_write_2bit( $in, $out, $options ) }
+ elsif ( $script eq "write_solid" ) { script_write_solid( $in, $out, $options ) }
+ elsif ( $script eq "head_records" ) { script_head_records( $in, $out, $options ) }
+ elsif ( $script eq "remove_keys" ) { script_remove_keys( $in, $out, $options ) }
+ elsif ( $script eq "rename_keys" ) { script_rename_keys( $in, $out, $options ) }
+ elsif ( $script eq "uniq_vals" ) { script_uniq_vals( $in, $out, $options ) }
+ elsif ( $script eq "merge_vals" ) { script_merge_vals( $in, $out, $options ) }
+ elsif ( $script eq "grab" ) { script_grab( $in, $out, $options ) }
+ elsif ( $script eq "compute" ) { script_compute( $in, $out, $options ) }
+ elsif ( $script eq "flip_tab" ) { script_flip_tab( $in, $out, $options ) }
+ elsif ( $script eq "add_ident" ) { script_add_ident( $in, $out, $options ) }
+ elsif ( $script eq "count_records" ) { script_count_records( $in, $out, $options ) }
+ elsif ( $script eq "random_records" ) { script_random_records( $in, $out, $options ) }
+ elsif ( $script eq "sort_records" ) { script_sort_records( $in, $out, $options ) }
+ elsif ( $script eq "count_vals" ) { script_count_vals( $in, $out, $options ) }
+ elsif ( $script eq "plot_histogram" ) { script_plot_histogram( $in, $out, $options ) }
+ elsif ( $script eq "plot_lendist" ) { script_plot_lendist( $in, $out, $options ) }
+ elsif ( $script eq "plot_chrdist" ) { script_plot_chrdist( $in, $out, $options ) }
+ elsif ( $script eq "plot_karyogram" ) { script_plot_karyogram( $in, $out, $options ) }
+ elsif ( $script eq "plot_matches" ) { script_plot_matches( $in, $out, $options ) }
+ elsif ( $script eq "plot_seqlogo" ) { script_plot_seqlogo( $in, $out, $options ) }
+ elsif ( $script eq "plot_phastcons_profiles" ) { script_plot_phastcons_profiles( $in, $out, $options ) }
+ elsif ( $script eq "analyze_bed" ) { script_analyze_bed( $in, $out, $options ) }
+ elsif ( $script eq "analyze_vals" ) { script_analyze_vals( $in, $out, $options ) }
+ elsif ( $script eq "length_vals" ) { script_length_vals( $in, $out, $options ) }
+ elsif ( $script eq "sum_vals" ) { script_sum_vals( $in, $out, $options ) }
+ elsif ( $script eq "mean_vals" ) { script_mean_vals( $in, $out, $options ) }
+ elsif ( $script eq "median_vals" ) { script_median_vals( $in, $out, $options ) }
+ elsif ( $script eq "max_vals" ) { script_max_vals( $in, $out, $options ) }
+ elsif ( $script eq "min_vals" ) { script_min_vals( $in, $out, $options ) }
+ elsif ( $script eq "upload_to_ucsc" ) { script_upload_to_ucsc( $in, $out, $options ) }
close $in if defined $in;
close $out;
-
- # unset status - missing
- # write log file - missing
}
num|n=s
);
}
+ elsif ( $script eq "read_fixedstep" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
elsif ( $script eq "read_blast_tab" )
{
@options = qw(
password|p=s
);
}
+ elsif ( $script eq "format_genome" )
+ {
+ @options = qw(
+ no_stream|x
+ dir|d=s
+ genome|g=s
+ formats|f=s
+ );
+ }
elsif ( $script eq "length_seq" )
{
@options = qw(
ooc|c
);
}
+ elsif ( $script eq "soap_seq" )
+ {
+ @options = qw(
+ in_file|i=s
+ genome|g=s
+ seed_size|s=s
+ mismatches|m=s
+ gap_size|G=s
+ cpus|c=s
+ );
+ }
elsif ( $script eq "match_seq" )
{
@options = qw(
compress|Z
);
}
+ elsif ( $script eq "write_fixedstep" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ compress|Z
+ );
+ }
elsif ( $script eq "write_2bit" )
{
@options = qw(
);
# print STDERR Dumper( \@options );
-
+
GetOptions(
\%options,
@options,
);
- $options{ "script" } = $script;
-
# print STDERR Dumper( \%options );
- if ( scalar( keys %options ) == 1 or $options{ "help" } ) {
+ if ( -t STDIN && scalar( keys %options ) == 0 or $options{ "help" } ) {
return wantarray ? %options : \%options;
}
$options{ "quals" } = [ split ",", $options{ "quals" } ] if defined $options{ "quals" };
$options{ "feats" } = [ split ",", $options{ "feats" } ] if defined $options{ "feats" };
$options{ "frames" } = [ split ",", $options{ "frames" } ] if defined $options{ "frames" };
+ $options{ "formats" } = [ split ",", $options{ "formats" } ] if defined $options{ "formats" };
# ---- check arguments ----
{
Maasha::Common::error( qq(Argument to --$opt must be "+" or "-" - not "$options{ $opt }") );
}
- elsif ( $opt eq "genome" )
+ elsif ( $opt eq "genome" and $script ne "format_genome" )
{
- @genomes = Maasha::Config::genomes();
-
- if ( not grep $options{ $opt }, @genomes ) {
- Maasha::Common::error( qq(Genome $options{ $opt } not found in "$ENV{ 'INST_DIR' }/conf/genomes.conf") );
+ @genomes = Maasha::Common::ls_dirs( "$ENV{ 'BP_DATA' }/genomes" );
+ map { $_ =~ s/.*\/(.+)$/$1/ } @genomes;
+
+ if ( not grep { $_ =~ /^$options{ $opt }$/ } @genomes ) {
+ Maasha::Common::error( qq(Genome $options{ $opt } not found in "$ENV{ 'BP_DATA' }/genomes/") );
}
}
elsif ( $opt eq "terminal" and not $options{ $opt } =~ /^(svg|post|dumb)/ )
}
Maasha::Common::error( qq(no --database specified) ) if $script eq "create_blast_db" and not $options{ "database" };
- Maasha::Common::error( qq(no --index_name specified) ) if $script eq "create_vmatch_index" and not $options{ "index_name" };
+ Maasha::Common::error( qq(no --index_name specified) ) if $script =~ /create_vmatch_index/ and not $options{ "index_name" };
Maasha::Common::error( qq(no --database or --genome specified) ) if $script eq "blast_seq" and not $options{ "genome" } and not $options{ "database" };
Maasha::Common::error( qq(both --database and --genome specified) ) if $script eq "blast_seq" and $options{ "genome" } and $options{ "database" };
Maasha::Common::error( qq(no --index_name or --genome specified) ) if $script eq "vmatch_seq" and not $options{ "genome" } and not $options{ "index_name" };
- Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index" };
- Maasha::Common::error( qq(no --genome specified) ) if $script =~ /get_genome_seq|get_genome_align|get_genome_phastcons|blat_seq|plot_phastcons_profiles|plot_karyogram/ and not $options{ "genome" };
+ Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index_name" };
+ Maasha::Common::error( qq(no --in_file or --genome specified) ) if $script eq "soap_seq" and not $options{ "genome" } and not $options{ "in_file" };
+ Maasha::Common::error( qq(both --in_file and --genome specified) ) if $script eq "soap_seq" and $options{ "genome" } and $options{ "in_file" };
+ Maasha::Common::error( qq(no --genome specified) ) if $script =~ /format_genome|get_genome_seq|get_genome_align|get_genome_phastcons|blat_seq|plot_phastcons_profiles|plot_karyogram/ and not $options{ "genome" };
Maasha::Common::error( qq(no --key specified) ) if $script =~ /plot_lendist|plot_histogram/ and not $options{ "key" };
Maasha::Common::error( qq(no --keys speficied) ) if $script =~ /sort_records|count_vals|sum_vals|mean_vals|median_vals|length_vals/ and not $options{ "keys" };
if ( $options->{ 'data_in' } ) {
$file = $options->{ 'data_in' };
} else {
- $file = join "", $ENV{ 'BP_DIR' }, "/bp_usage/", $options->{ 'script' }, ".wiki";
+ $file = join "", $ENV{ 'BP_DIR' }, "/bp_usage/", $options->{ 'SCRIPT' }, ".wiki";
}
$wiki = Maasha::Gwiki::gwiki_read( $file );
@{ $wiki } = grep { $_->[ 0 ]->{ 'FORMAT' } =~ /paragraph/ } @{ $wiki };
$synopsis = $wiki->[ 0 ]->[ 0 ]->{ 'TEXT' };
+ $synopsis =~ s/!(\w)/$1/g;
printf( "%-30s%s\n", $program, $synopsis );
}
}
+sub script_list_genomes
+{
+ # Martin A. Hansen, January 2008.
+
+ # Prints the synopsis from the usage for each of the biopieces.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( @genomes, $genome, @formats, $format, %hash, %found, @row );
+
+ @genomes = Maasha::Common::ls_dirs( "$ENV{ 'BP_DATA' }/genomes" );
+
+ foreach $genome ( @genomes )
+ {
+ next if $genome =~ /\.$/;
+
+ @formats = Maasha::Common::ls_dirs( $genome );
+
+ foreach $format ( @formats )
+ {
+ if ( $format =~ /\/([^\/]+)\/(\w+)$/ )
+ {
+ $hash{ $1 }{ $2 } = 1;
+
+ $found{ $2 } = 1;
+ }
+ }
+ }
+
+ @row = "Genome";
+
+ map { push @row, $_ } sort keys %found;
+
+ print join( "\t", @row ), "\n";
+
+ foreach $genome ( sort keys %hash )
+ {
+ @row = $genome;
+
+ foreach $format ( sort keys %found )
+ {
+ if ( exists $hash{ $genome }{ $format } ) {
+ push @row, "yes";
+ } else {
+ push @row, "no";
+ }
+ }
+
+ print join( "\t", @row ), "\n";
+ }
+}
+
+
sub script_read_fasta
{
# Martin A. Hansen, August 2007.
}
+sub script_read_fixedstep
+{
+ # Martin A. Hansen, Juli 2008.
+
+ # Read fixedStep wiggle format from stream or file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $file, $record, $entry, $head, $chr, $chr_beg, $step, $data_in, $num );
+
+ while ( $record = get_record( $in ) ) {
+ put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = Maasha::Common::read_open( $file );
+
+ while ( $entry = Maasha::UCSC::fixedstep_get_entry( $data_in ) )
+ {
+ $head = shift @{ $entry };
+
+ if ( $head =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
+ {
+ $record->{ "CHR" } = $1;
+ $record->{ "CHR_BEG" } = $2;
+ $record->{ "STEP" } = $3;
+ $record->{ "VALS" } = join ",", @{ $entry };
+ }
+
+ put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
sub script_read_blast_tab
{
# Martin A. Hansen, September 2007.
{
$data_in = Maasha::Common::read_open( $file );
- while ( $entry = Maasha::UCSC::phastcons_get_entry( $data_in ) )
+ while ( $entry = Maasha::UCSC::fixedstep_get_entry( $data_in ) )
{
@records = Maasha::UCSC::phastcons_parse_entry( $entry, $options );
}
+sub script_format_genome
+{
+ # Martin A. Hansen, Juli 2008.
+
+ # Format a genome to speficed formats.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $dir, $genome, $fasta_dir, $phastcons_dir, $vals, $fh_out, $record, $format, $index, $entry );
+
+ $dir = $options->{ 'dir' } || $ENV{ 'BP_DATA' };
+ $genome = $options->{ 'genome' };
+
+ Maasha::Common::error( "Directory: $dir does not exist" ) if not -d $dir;
+ Maasha::Common::dir_create_if_not_exists( "$dir/genomes" );
+ Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome" );
+
+ if ( grep { $_ =~ /fasta|blast|vmatch/i } @{ $options->{ "formats" } } )
+ {
+ if ( -f "$dir/genomes/$genome/fasta/$genome.fna" )
+ {
+ $fasta_dir = "$dir/genomes/$genome/fasta";
+ }
+ else
+ {
+ Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/fasta" );
+
+ $fasta_dir = "$dir/genomes/$genome/fasta";
+
+ $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" );
+ }
+ }
+ elsif ( grep { $_ =~ /phastcons/i } @{ $options->{ "formats" } } )
+ {
+ Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/phastcons" );
+
+ $phastcons_dir = "$dir/genomes/$genome/phastcons";
+
+ $fh_out = Maasha::Common::write_open( "$phastcons_dir/$genome.pp" );
+ }
+
+ while ( $record = get_record( $in ) )
+ {
+ if ( $fh_out and $entry = record2fasta( $record ) )
+ {
+ Maasha::Fasta::put_entry( $entry, $fh_out, $options->{ "wrap" } );
+ }
+ elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
+ {
+ print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
+
+ $vals = $record->{ 'VALS' };
+
+ $vals =~ tr/,/\n/;
+
+ print $fh_out "$vals\n";
+ }
+
+ put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ foreach $format ( @{ $options->{ 'formats' } } )
+ {
+ if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/fasta/$genome.index" ) }
+ elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) }
+ elsif ( $format =~ /^blat$/i ) { print STDERR "BLAT FORMAT NOT IMPLEMENTED" }
+ elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $BP_TMP ) }
+ elsif ( $format =~ /^phastcons$/i ) { Maasha::UCSC::phastcons_index( "$genome.pp", $phastcons_dir ) }
+ }
+
+ close $fh_out if $fh_out;
+}
+
+
sub script_length_seq
{
# Martin A. Hansen, August 2007.
%new_record = %{ $record };
$new_record{ "SEQ" } = Maasha::Seq::translate( $record->{ "SEQ" }, $frame );
- $new_record{ "SEQ_LEN" } = length $record->{ "SEQ" };
+ $new_record{ "SEQ_LEN" } = length $new_record{ "SEQ" };
$new_record{ "FRAME" } = $frame;
put_record( \%new_record, $out );
}
}
+ $record->{ "SEQ_LEN" } = length $record->{ "SEQ" };
+
put_record( $record, $out );
}
}
# Returns nothing.
- my ( $record, $genome_file, $index_file, $index, $fh, $index_head, $index_beg, $index_len, $beg, $len, %lookup_hash, @begs, @lens, $i );
+ my ( $record, $genome, $genome_file, $index_file, $index, $fh, $index_head, $index_beg, $index_len, $beg, $len, %lookup_hash, @begs, @lens, $i );
$options->{ "flank" } ||= 0;
if ( $options->{ "genome" } )
{
- $genome_file = Maasha::Config::genome_fasta( $options->{ 'genome' } );
- $index_file = Maasha::Config::genome_fasta_index( $options->{ 'genome' } );
+ $genome = $options->{ "genome" };
+
+ $genome_file = "$ENV{ 'BP_DATA' }/genomes/$genome/fasta/$genome.fna";
+ $index_file = "$ENV{ 'BP_DATA' }/genomes/$genome/fasta/$genome.index";
$fh = Maasha::Common::read_open( $genome_file );
$index = Maasha::Fasta::index_retrieve( $index_file );
if ( $options->{ "uniq" } and not $lookup{ $subseq } )
{
- $new_record->{ "REC_TYPE" } = "SPLIT";
$new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]";
$new_record->{ "SEQ" } = $subseq;
}
else
{
- $new_record->{ "REC_TYPE" } = "SPLIT";
$new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]";
$new_record->{ "SEQ" } = $subseq;
$patterns = Maasha::Patscan::read_patterns( $options->{ "patterns_in" } );
}
- $genome_file = Maasha::Config::genome_fasta( $options->{ 'genome' } ) if $options->{ 'genome' };
+ $genome_file = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna" if $options->{ 'genome' };
push @args, "-c" if $options->{ "comp" };
push @args, "-m $options->{ 'max_hits' }" if $options->{ 'max_hits' };
$i++;
}
-
-# put_record( $record, $out );
}
close $fh_out;
# Returns nothing.
- my ( $fh, $seq_type, $path, $record );
+ my ( $fh, $seq_type, $path, $record, $entry );
$path = $options->{ "database" };
{
put_record( $record, $out ) if not $options->{ "no_stream" };
- if ( $record->{ "SEQ" } and $record->{ "SEQ_NAME" } )
+ if ( $entry = record2fasta( $record ) )
{
- $seq_type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $seq_type;
+ $seq_type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not $seq_type;
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh );
+ Maasha::Fasta::put_entry( $entry, $fh );
}
}
# Returns nothing.
- my ( $genome, $q_type, $s_type, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields );
+ my ( $genome, $q_type, $s_type, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields, $entry );
$options->{ "e_val" } = 10 if not defined $options->{ "e_val" };
$options->{ "filter" } = "F";
$options->{ "filter" } = "T" if $options->{ "filter" };
$options->{ "cpus" } ||= 1;
- $options->{ "database" } = Maasha::Config::genome_blast( $options->{ 'genome' } ) if $options->{ 'genome' };
+ $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna" if $options->{ 'genome' };
$tmp_in = "$BP_TMP/blast_query.seq";
$tmp_out = "$BP_TMP/blast.result";
while ( $record = get_record( $in ) )
{
- if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ if ( $entry = record2fasta( $record ) )
{
- $q_type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $q_type;
+ $q_type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not $q_type;
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out );
+ Maasha::Fasta::put_entry( $entry, $fh_out );
}
put_record( $record, $out );
# Returns nothing.
- my ( $blat_args, $genome_file, $query_file, $fh_in, $fh_out, $type, $record, $result_file, $entries );
+ my ( $blat_args, $genome_file, $query_file, $fh_in, $fh_out, $type, $record, $result_file, $entries, $entry );
- $genome_file = Maasha::Config::genome_fasta( $options->{ "genome" } );
+ $genome_file = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna";
$options->{ 'tile_size' } ||= 11;
$options->{ 'one_off' } ||= 0;
$blat_args .= " -minIdentity=$options->{ 'min_identity' }";
$blat_args .= " -minScore=$options->{ 'min_score' }";
$blat_args .= " -stepSize=$options->{ 'step_size' }";
- $blat_args .= " -ooc=" . Maasha::Config::genome_blat_ooc( $options->{ "genome" }, 11 ) if $options->{ 'ooc' };
+# $blat_args .= " -ooc=" . Maasha::Config::genome_blat_ooc( $options->{ "genome" }, 11 ) if $options->{ 'ooc' };
$query_file = "$BP_TMP/blat.seq";
while ( $record = get_record( $in ) )
{
- if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ if ( $entry = record2fasta( $record ) )
{
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, 80 );
- $type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type;
+ $type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not $type;
+ Maasha::Fasta::put_entry( $entry, $fh_out, 80 );
}
put_record( $record, $out );
}
+sub script_soap_seq
+{
+ # Martin A. Hansen, July 2008.
+
+ # soap sequences in stream against a given file or genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $genome, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields, $entry );
+
+ $options->{ "seed_size" } ||= 10;
+ $options->{ "mismatches" } ||= 2;
+ $options->{ "gap_size" } ||= 0;
+ $options->{ "cpus" } ||= 1;
+
+ $options->{ "in_file" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna" if $options->{ 'genome' };
+
+ $tmp_in = "$BP_TMP/soap_query.seq";
+ $tmp_out = "$BP_TMP/soap.result";
+
+ $fh_out = Maasha::Common::write_open( $tmp_in );
+
+ while ( $record = get_record( $in ) )
+ {
+ if ( $entry = record2fasta( $record ) ) {
+ Maasha::Fasta::put_entry( $entry, $fh_out );
+ }
+
+ put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ Maasha::Common::run( "soap", "-s $options->{ 'seed_size' } -r 2 -a $tmp_in -v $options->{ 'mismatches' } -g $options->{ 'gap_size' } -p $options->{ 'cpus' } -d $options->{ 'in_file' } -o $tmp_out > /dev/null 2>&1", 1 );
+
+ unlink $tmp_in;
+
+ $fh_out = Maasha::Common::read_open( $tmp_out );
+
+ undef $record;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ @fields = split /\t/, $line;
+
+ $record->{ "REC_TYPE" } = "SOAP";
+ $record->{ "Q_ID" } = $fields[ 0 ];
+ $record->{ "SCORE" } = $fields[ 3 ];
+ $record->{ "STRAND" } = $fields[ 6 ];
+ $record->{ "S_ID" } = $fields[ 7 ];
+ $record->{ "S_BEG" } = $fields[ 8 ] - 1; # soap is 1-based
+ $record->{ "S_END" } = $fields[ 8 ] + $fields[ 5 ] - 2;
+
+ put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ unlink $tmp_out;
+}
+
+
sub script_match_seq
{
# Martin A. Hansen, August 2007.
# Returns nothing.
- my ( $record, $file_tmp, $fh_tmp, $type );
+ my ( $record, $file_tmp, $fh_tmp, $type, $entry );
if ( $options->{ "index_name" } )
{
while ( $record = get_record( $in ) )
{
- if ( $options->{ "index_name" } and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ if ( $options->{ "index_name" } and $entry = record2fasta( $record ) )
{
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_tmp );
+ Maasha::Fasta::put_entry( $entry, $fh_tmp );
- $type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type;
+ $type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not defined $type;
}
put_record( $record, $out ) if not $options->{ "no_stream" };
# Returns nothing.
- my ( @index_files, @records, $result_file, $fh_in, $record );
+ my ( @index_files, @records, $result_file, $fh_in, $record, %hash );
$options->{ 'count' } = 1 if $options->{ 'max_hits' };
- if ( $options->{ "index_name" } ) {
+ if ( $options->{ "index_name" } )
+ {
@index_files = $options->{ "index_name" };
- } else {
- @index_files = Maasha::Config::genome_vmatch( $options->{ "genome" } );
+ }
+ else
+ {
+ @index_files = Maasha::Common::ls_files( "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/vmatch" );
+
+ map { $_ =~ /^(.+)\.[a-z1]{3,4}$/; $hash{ $1 } = 1 } @index_files;
+
+ @index_files = sort keys %hash;
}
while ( $record = get_record( $in ) )
# Returns nothing.
- my ( $record, $fh );
+ my ( $record, $fh, $entry );
$fh = write_stream( $options->{ "data_out" }, $options->{ "compress" } );
while ( $record = get_record( $in ) )
{
- if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh, $options->{ "wrap" } );
+ if ( $entry = record2fasta( $record ) ) {
+ Maasha::Fasta::put_entry( $entry, $fh, $options->{ "wrap" } );
}
put_record( $record, $out ) if not $options->{ "no_stream" };
# Write BED format for the UCSC genome browser using records in stream.
- # Crude - needs lots of work!
-
my ( $in, # handle to in stream
$out, # handle to out stream
$options, # options hash
Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
}
+ elsif ( $record->{ "REC_TYPE" } eq "SOAP" and $record->{ "S_ID" } =~ /^chr/i ) # ---- Hits from Vmatch ----
+ {
+ $new_record->{ "CHR" } = $record->{ "S_ID" };
+ $new_record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $new_record->{ "CHR_END" } = $record->{ "S_END" };
+ $new_record->{ "Q_ID" } = $record->{ "Q_ID" };
+ $new_record->{ "SCORE" } = $record->{ "SCORE" } || 999;
+ $new_record->{ "STRAND" } = $record->{ "STRAND" };
+
+ Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
+ }
elsif ( $record->{ "CHR" } and defined $record->{ "CHR_BEG" } and $record->{ "CHR_END" } ) # ---- Generic data from tables ----
{
Maasha::UCSC::bed_put_entry( $record, $fh );
}
+sub script_write_fixedstep
+{
+ # Martin A. Hansen, Juli 2008.
+
+ # Write fixedStep entries from recrods in the stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $record, $vals );
+
+ $fh = write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+ while ( $record = get_record( $in ) )
+ {
+ if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
+ {
+ print $fh "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
+
+ $vals = $record->{ 'VALS' };
+
+ $vals =~ tr/,/\n/;
+
+ print $fh "$vals\n";
+ }
+
+ put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh;
+}
+
+
sub script_write_2bit
{
# Martin A. Hansen, March 2008.
# Returns nothing.
- my ( $record, $mask, $tmp_file, $fh_tmp, $fh_in, $fh_out );
+ my ( $record, $mask, $tmp_file, $fh_tmp, $fh_in, $fh_out, $entry );
$mask = 1 if not $options->{ "no_mask" };
while ( $record = get_record( $in ) )
{
- if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_tmp );
+ if ( $entry = record2fasta( $record ) ) {
+ Maasha::Fasta::put_entry( $entry, $fh_tmp );
}
put_record( $record, $out ) if not $options->{ "no_stream" };
# Returns nothing.
- my ( $record, $fh, $seq_cs );
+ my ( $record, $fh, $entry );
$fh = write_stream( $options->{ "data_out" }, $options->{ "compress" } );
while ( $record = get_record( $in ) )
{
- if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ if ( $entry = record2fasta( $record ) )
{
- $seq_cs = Maasha::Solid::seq2color_space( $record->{ "SEQ" } );
+ $entry->[ SEQ ] = Maasha::Solid::seq2color_space( uc $entry->[ SEQ ] );
- Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $seq_cs ], $fh, $options->{ "wrap" } );
+ Maasha::Fasta::put_entry( $entry, $fh, $options->{ "wrap" } );
}
put_record( $record, $out ) if not $options->{ "no_stream" };
foreach $key ( keys %{ $analysis } )
{
$analysis->{ $key }->{ "MEAN" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" } / $analysis->{ $key }->{ "COUNT" };
- $analysis->{ $key }->{ "SUM" } = sprintf "%.2f", $analysis->{ $key }->{ "SUN" };
+ $analysis->{ $key }->{ "SUM" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" };
}
my ( $keys, $types, $counts, $mins, $maxs, $sums, $means );
$wig_file = "$options->{ 'table' }.wig";
$wib_file = "$options->{ 'table' }.wib";
- $wib_dir = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'database' }/wib";
+ $wib_dir = "$ENV{ 'HOME' }/ucsc/wib";
Maasha::Common::dir_create_if_not_exists( $wib_dir );
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+sub record2fasta
+{
+ # Martin A. Hansen, July 2008.
+
+ # Given a biopiece record converts it to a FASTA record.
+ # If no generic SEQ or SEQ_NAME is found, the Q_* and S_* are
+ # tried in that order.
+
+ my ( $record, # record
+ ) = @_;
+
+ # Returns a tuple.
+
+ my ( $seq_name, $seq );
+
+ $seq_name = $record->{ "SEQ_NAME" } || $record->{ "Q_ID" } || $record->{ "S_ID" };
+ $seq = $record->{ "SEQ" } || $record->{ "Q_SEQ" } || $record->{ "S_SEQ" };
+
+ if ( defined $seq_name and defined $seq ) {
+ return wantarray ? ( $seq_name, $seq ) : [ $seq_name, $seq ];
+ } else {
+ return;
+ }
+}
+
+
sub read_stream
{
# Martin A. Hansen, July 2007.
# Reads one record at a time and converts that record
# to a Perl data structure (a hash) which is returned.
- my ( $fh,
+ my ( $fh, # handle to stream
) = @_;
- # Returns data structure.
+ # Returns a hash.
my ( $block, @lines, $line, $key, $value, %record );
foreach $line ( @lines )
{
- ( $key, $value ) = split ": ", $line;
+ ( $key, $value ) = split ": ", $line, 2;
$record{ $key } = $value;
}
print STDERR "\nProgram '$script' died->$sig" . " - Please wait for temporary data to be removed\n";
}
- # This is a really bad solution, potentially, anyone can include this module and set
- # the BP_TMP to point at any dir and thus take out the machine !!!
-
- Maasha::Common::dir_remove( $BP_TMP );
+ clean_tmp();
}
exit( 0 );
}
-END
-{
- # This is a really bad solution, potentially, anyone can include this module and set
- # the BP_TMP to point at any dir and thus take out the machine !!!
-
- Maasha::Common::dir_remove( $BP_TMP );
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-1;
-
-__END__
-
-
-sub script_read_soft
+sub clean_tmp
{
- # Martin A. Hansen, December 2007.
+ # Martin A. Hansen, July 2008.
- # Read soft format.
- # http://www.ncbi.nlm.nih.gov/geo/info/soft2.html
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- $options, # options hash
- ) = @_;
+ # Cleans out any unused temporary files and direcotries in BP_TMP.
# Returns nothing.
- my ( $data_in, $file, $num, $records, $record );
+ my ( $tmpdir, @dirs, $curr_pid, $dir, $user, $sid, $pid );
- while ( $record = get_record( $in ) ) {
- put_record( $record, $out );
- }
+ $tmpdir = $ENV{ 'BP_TMP' } || Maasha::Common::error( 'No BP_TMP variable in environment.' );
- $num = 1;
+ $curr_pid = Maasha::Common::get_processid();
- foreach $file ( @{ $options->{ "files" } } )
- {
- $records = Maasha::NCBI::soft_parse( $file );
+ @dirs = Maasha::Common::ls_dirs( $tmpdir );
- foreach $record ( @{ $records } )
+ foreach $dir ( @dirs )
+ {
+ if ( $dir =~ /^$tmpdir\/(.+)_(\d+)_(\d+)_bp_tmp$/ )
{
- put_record( $record, $out );
+ $user = $1;
+ $sid = $2;
+ $pid = $3;
- goto NUM if $options->{ "num" } and $num == $options->{ "num" };
-
- $num++;
+ if ( $user eq Maasha::Common::get_user() )
+ {
+ if ( not Maasha::Common::process_running( $pid ) )
+ {
+ # print STDERR "Removing stale dir: $dir\n";
+ Maasha::Common::dir_remove( $dir );
+ }
+ elsif ( $pid == $curr_pid )
+ {
+ # print STDERR "Removing current dir: $dir\n";
+ Maasha::Common::dir_remove( $dir );
+ }
+ }
}
}
+}
- NUM:
- close $data_in if $data_in;
+END
+{
+ clean_tmp();
}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+1;
+
+__END__