From: martinahansen Date: Mon, 21 Jul 2008 05:17:55 +0000 (+0000) Subject: added soap_seq X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=09ac3b416c60618e4ceb372d77ba51a798a0ad41;p=biopieces.git added soap_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@180 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/soap_seq b/bp_bin/soap_seq new file mode 100755 index 0000000..4cd1d44 --- /dev/null +++ b/bp_bin/soap_seq @@ -0,0 +1,6 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +use Maasha::Biopieces; diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index e883884..ce93a0e 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -216,6 +216,7 @@ sub run_script elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) } elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) } elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) } + elsif ( $script eq "soap_seq" ) { script_soap_seq( $in, $out, $options ) } elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) } elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) } elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) } @@ -569,6 +570,14 @@ sub get_options ooc|c ); } + elsif ( $script eq "soap_seq" ) + { + @options = qw( + in_file|i=s + genome|g=s + cpus|c=s + ); + } elsif ( $script eq "match_seq" ) { @options = qw( @@ -1007,7 +1016,9 @@ sub get_options Maasha::Common::error( qq(no --database or --genome specified) ) if $script eq "blast_seq" and not $options{ "genome" } and not $options{ "database" }; Maasha::Common::error( qq(both --database and --genome specified) ) if $script eq "blast_seq" and $options{ "genome" } and $options{ "database" }; Maasha::Common::error( qq(no --index_name or --genome specified) ) if $script eq "vmatch_seq" and not $options{ "genome" } and not $options{ "index_name" }; - Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index" }; + Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index_name" }; + Maasha::Common::error( qq(no --in_file or --genome specified) ) if $script eq "soap_seq" and not $options{ "genome" } and not $options{ "in_file" }; + Maasha::Common::error( qq(both --in_file and --genome specified) ) if $script eq "soap_seq" and $options{ "genome" } and $options{ "in_file" }; Maasha::Common::error( qq(no --genome specified) ) if $script =~ /format_genome|get_genome_seq|get_genome_align|get_genome_phastcons|blat_seq|plot_phastcons_profiles|plot_karyogram/ and not $options{ "genome" }; Maasha::Common::error( qq(no --key specified) ) if $script =~ /plot_lendist|plot_histogram/ and not $options{ "key" }; Maasha::Common::error( qq(no --keys speficied) ) if $script =~ /sort_records|count_vals|sum_vals|mean_vals|median_vals|length_vals/ and not $options{ "keys" }; @@ -3404,7 +3415,7 @@ sub script_blast_seq $options->{ "filter" } = "T" if $options->{ "filter" }; $options->{ "cpus" } ||= 1; - $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna"if $options->{ 'genome' }; + $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna" if $options->{ 'genome' }; $tmp_in = "$BP_TMP/blast_query.seq"; $tmp_out = "$BP_TMP/blast.result"; @@ -3558,6 +3569,94 @@ sub script_blat_seq } +sub script_soap_seq +{ + # Martin A. Hansen, July 2008. + + # soap sequences in stream against a given file or genome. + + my ( $in, # handle to in stream + $out, # handle to out stream + $options, # options hash + ) = @_; + + # Returns nothing. + + my ( $genome, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields ); + + $options->{ "cpus" } ||= 1; + + $options->{ "in_file" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna" if $options->{ 'genome' }; + + $tmp_in = "$BP_TMP/soap_query.seq"; + $tmp_out = "$BP_TMP/soap.result"; + + $fh_out = Maasha::Common::write_open( $tmp_in ); + + while ( $record = get_record( $in ) ) + { + if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) { + Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out ); + } + + put_record( $record, $out ); + } + + close $fh_out; + + Maasha::Common::run( "soap", "-r 2 -a $tmp_in -p $options->{ 'cpus' } -d $options->{ 'in_file' } -o $tmp_out > /dev/null 2>&1", 1 ); + + unlink $tmp_in; + + $fh_out = Maasha::Common::read_open( $tmp_out ); + + undef $record; + + while ( $line = <$fh_out> ) + { + chomp $line; + + +print Dumper( $line ); + + next if $line =~ /^#/; + + @fields = split /\s+/, $line; + + $record->{ "REC_TYPE" } = "BLAST"; + $record->{ "Q_ID" } = $fields[ 0 ]; + $record->{ "S_ID" } = $fields[ 1 ]; + $record->{ "IDENT" } = $fields[ 2 ]; + $record->{ "ALIGN_LEN" } = $fields[ 3 ]; + $record->{ "MISMATCHES" } = $fields[ 4 ]; + $record->{ "GAPS" } = $fields[ 5 ]; + $record->{ "Q_BEG" } = $fields[ 6 ] - 1; # BLAST is 1-based + $record->{ "Q_END" } = $fields[ 7 ] - 1; # BLAST is 1-based + $record->{ "S_BEG" } = $fields[ 8 ] - 1; # BLAST is 1-based + $record->{ "S_END" } = $fields[ 9 ] - 1; # BLAST is 1-based + $record->{ "E_VAL" } = $fields[ 10 ]; + $record->{ "BIT_SCORE" } = $fields[ 11 ]; + + if ( $record->{ "S_BEG" } > $record->{ "S_END" } ) + { + $record->{ "STRAND" } = '-'; + + ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } ); + } + else + { + $record->{ "STRAND" } = '+'; + } + + put_record( $record, $out ); + } + + close $fh_out; + + unlink $tmp_out; +} + + sub script_match_seq { # Martin A. Hansen, August 2007.