# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> LOG <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-my $log_fh = Maasha::Common::append_open( "$ENV{ 'BP_TMP' }/biopieces.log" );
+my $log_global = Maasha::Common::append_open( "$ENV{ 'BP_LOG' }/biopieces.log" );
+my $log_local = Maasha::Common::append_open( "$ENV{ 'HOME' }/.biopieces.log" );
-$log_fh->autoflush( 1 );
+$log_global->autoflush( 1 );
+$log_local->autoflush( 1 );
-&log( $log_fh, $script, \@ARGV );
+&log( $log_global, $script, \@ARGV );
+&log( $log_local, $script, \@ARGV );
-close $log_fh;
+close $log_global;
+close $log_local;
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RUN SCRIPT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) }
elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) }
elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) }
+ elsif ( $script eq "soap_seq" ) { script_soap_seq( $in, $out, $options ) }
elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) }
elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) }
- elsif ( $script eq "create_fixedstep_index" ) { script_create_fixedstep_index( $in, $out, $options ) }
elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) }
elsif ( $script eq "write_fasta" ) { script_write_fasta( $in, $out, $options ) }
elsif ( $script eq "write_align" ) { script_write_align( $in, $out, $options ) }
ooc|c
);
}
+ elsif ( $script eq "soap_seq" )
+ {
+ @options = qw(
+ in_file|i=s
+ genome|g=s
+ mismatches|m=s
+ gap_size|G=s
+ cpus|c=s
+ );
+ }
elsif ( $script eq "match_seq" )
{
@options = qw(
no_stream|x
);
}
- elsif ( $script eq "create_fixedstep_index" )
- {
- @options = qw(
- index_name|i=s
- no_stream|x
- );
- }
elsif ( $script eq "vmatch_seq" )
{
@options = qw(
}
Maasha::Common::error( qq(no --database specified) ) if $script eq "create_blast_db" and not $options{ "database" };
- Maasha::Common::error( qq(no --index_name specified) ) if $script =~ /create_vmatch_index|create_fixedstep_index/ and not $options{ "index_name" };
+ Maasha::Common::error( qq(no --index_name specified) ) if $script =~ /create_vmatch_index/ and not $options{ "index_name" };
Maasha::Common::error( qq(no --database or --genome specified) ) if $script eq "blast_seq" and not $options{ "genome" } and not $options{ "database" };
Maasha::Common::error( qq(both --database and --genome specified) ) if $script eq "blast_seq" and $options{ "genome" } and $options{ "database" };
Maasha::Common::error( qq(no --index_name or --genome specified) ) if $script eq "vmatch_seq" and not $options{ "genome" } and not $options{ "index_name" };
- Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index" };
+ Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index_name" };
+ Maasha::Common::error( qq(no --in_file or --genome specified) ) if $script eq "soap_seq" and not $options{ "genome" } and not $options{ "in_file" };
+ Maasha::Common::error( qq(both --in_file and --genome specified) ) if $script eq "soap_seq" and $options{ "genome" } and $options{ "in_file" };
Maasha::Common::error( qq(no --genome specified) ) if $script =~ /format_genome|get_genome_seq|get_genome_align|get_genome_phastcons|blat_seq|plot_phastcons_profiles|plot_karyogram/ and not $options{ "genome" };
Maasha::Common::error( qq(no --key specified) ) if $script =~ /plot_lendist|plot_histogram/ and not $options{ "key" };
Maasha::Common::error( qq(no --keys speficied) ) if $script =~ /sort_records|count_vals|sum_vals|mean_vals|median_vals|length_vals/ and not $options{ "keys" };
# Returns nothing.
- my ( $dir, $genome, $fasta_dir, $fh_out, $record, $format, $index );
+ my ( $dir, $genome, $fasta_dir, $phastcons_dir, $vals, $fh_out, $record, $format, $index );
$dir = $options->{ 'dir' } || $ENV{ 'BP_DATA' };
$genome = $options->{ 'genome' };
Maasha::Common::dir_create_if_not_exists( "$dir/genomes" );
Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome" );
- if ( -f "$dir/genomes/$genome/fasta/$genome.fna" )
+ if ( grep { $_ =~ /fasta|blast|vmatch/i } @{ $options->{ "formats" } } )
{
- $fasta_dir = "$dir/genomes/$genome/fasta";
- }
- elsif ( grep { $_ =~ /fasta/i } @{ $options->{ "formats" } } )
- {
- Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/fasta" );
+ if ( -f "$dir/genomes/$genome/fasta/$genome.fna" )
+ {
+ $fasta_dir = "$dir/genomes/$genome/fasta";
+ }
+ else
+ {
+ Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/fasta" );
- $fasta_dir = "$dir/genomes/$genome/fasta";
+ $fasta_dir = "$dir/genomes/$genome/fasta";
- $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" );
+ $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" );
+ }
}
- else
+ elsif ( grep { $_ =~ /phastcons/i } @{ $options->{ "formats" } } )
{
- $fasta_dir = $BP_TMP;
+ Maasha::Common::dir_create_if_not_exists( "$dir/genomes/$genome/phastcons" );
+
+ $phastcons_dir = "$dir/genomes/$genome/phastcons";
- $fh_out = Maasha::Common::write_open( "$fasta_dir/$genome.fna" );
+ $fh_out = Maasha::Common::write_open( "$phastcons_dir/$genome.pp" );
}
while ( $record = get_record( $in ) )
{
- if ( $fh_out and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ if ( $fh_out and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, $options->{ "wrap" } );
}
+ elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
+ {
+ print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
+
+ $vals = $record->{ 'VALS' };
+
+ $vals =~ tr/,/\n/;
+
+ print $fh_out "$vals\n";
+ }
put_record( $record, $out ) if not $options->{ "no_stream" };
}
foreach $format ( @{ $options->{ 'formats' } } )
{
- if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/fasta/$genome.index" ) }
- elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) }
- elsif ( $format =~ /^blat$/i ) { print STDERR "BLAT FORMAT NOT IMPLEMENTED" }
- elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $BP_TMP ) }
+ if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/fasta/$genome.index" ) }
+ elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) }
+ elsif ( $format =~ /^blat$/i ) { print STDERR "BLAT FORMAT NOT IMPLEMENTED" }
+ elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $BP_TMP ) }
+ elsif ( $format =~ /^phastcons$/i ) { Maasha::UCSC::phastcons_index( "$genome.pp", $phastcons_dir ) }
}
close $fh_out if $fh_out;
$options->{ "filter" } = "T" if $options->{ "filter" };
$options->{ "cpus" } ||= 1;
- $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna"if $options->{ 'genome' };
+ $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna" if $options->{ 'genome' };
$tmp_in = "$BP_TMP/blast_query.seq";
$tmp_out = "$BP_TMP/blast.result";
}
+sub script_soap_seq
+{
+ # Martin A. Hansen, July 2008.
+
+ # soap sequences in stream against a given file or genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $genome, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields );
+
+ $options->{ "mismatches" } ||= 2;
+ $options->{ "gap_size" } ||= 0;
+ $options->{ "cpus" } ||= 1;
+
+ $options->{ "in_file" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna" if $options->{ 'genome' };
+
+ $tmp_in = "$BP_TMP/soap_query.seq";
+ $tmp_out = "$BP_TMP/soap.result";
+
+ $fh_out = Maasha::Common::write_open( $tmp_in );
+
+ while ( $record = get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out );
+ }
+
+ put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ Maasha::Common::run( "soap", "-r 2 -a $tmp_in -v $options->{ 'mismatches' } -g $options->{ 'gap_size'} -p $options->{ 'cpus' } -d $options->{ 'in_file' } -o $tmp_out > /dev/null 2>&1", 1 );
+
+ unlink $tmp_in;
+
+ $fh_out = Maasha::Common::read_open( $tmp_out );
+
+ undef $record;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ @fields = split /\t/, $line;
+
+ $record->{ "REC_TYPE" } = "SOAP";
+ $record->{ "Q_ID" } = $fields[ 0 ];
+ $record->{ "SCORE" } = $fields[ 3 ];
+ $record->{ "STRAND" } = $fields[ 6 ];
+ $record->{ "S_ID" } = $fields[ 7 ];
+ $record->{ "S_BEG" } = $fields[ 8 ] - 1; # soap is one based
+ $record->{ "S_END" } = $fields[ 8 ] + $fields[ 5 ] - 2;
+
+ put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ unlink $tmp_out;
+}
+
+
sub script_match_seq
{
# Martin A. Hansen, August 2007.
}
-sub script_create_fixedstep_index
-{
- # Martin A. Hansen, January 2008.
-
- # Create a fixedStep index from records in the stream.
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- $options, # options hash
- ) = @_;
-
- # Returns nothing.
-
- my ( $record, $file_tmp, $fh_tmp, $vals, $index );
-
- $file_tmp = "$BP_TMP/fixedstep.tmp";
-
- $fh_tmp = Maasha::Common::write_open( $file_tmp );
-
- while ( $record = get_record( $in ) )
- {
- if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
- {
- print $fh_tmp "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
-
- $vals = $record->{ 'VALS' };
-
- $vals =~ tr/,/\n/;
-
- print $fh_tmp "$vals\n";
- }
- }
-
- close $fh_tmp;
-
- $index = Maasha::UCSC::fixedstep_index_create( $file_tmp );
-
- unlink $file_tmp;
-
- Maasha::UCSC::fixedstep_index_store( $options->{ 'index_name' }, $index );
-}
-
-
sub script_vmatch_seq
{
# Martin A. Hansen, August 2007.
Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
}
+ elsif ( $record->{ "REC_TYPE" } eq "SOAP" and $record->{ "S_ID" } =~ /^chr/i ) # ---- Hits from Vmatch ----
+ {
+ $new_record->{ "CHR" } = $record->{ "S_ID" };
+ $new_record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $new_record->{ "CHR_END" } = $record->{ "S_END" };
+ $new_record->{ "Q_ID" } = $record->{ "Q_ID" };
+ $new_record->{ "SCORE" } = $record->{ "SCORE" } || 999;
+ $new_record->{ "STRAND" } = $record->{ "STRAND" };
+
+ Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
+ }
elsif ( $record->{ "CHR" } and defined $record->{ "CHR_BEG" } and $record->{ "CHR_END" } ) # ---- Generic data from tables ----
{
Maasha::UCSC::bed_put_entry( $record, $fh );
$wig_file = "$options->{ 'table' }.wig";
$wib_file = "$options->{ 'table' }.wib";
- $wib_dir = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'database' }/wib";
+ $wib_dir = "$ENV{ 'HOME' }/ucsc/wib";
Maasha::Common::dir_create_if_not_exists( $wib_dir );