elsif ( $script eq "create_blast_db" ) { script_create_blast_db( $in, $out, $options ) }
elsif ( $script eq "blast_seq" ) { script_blast_seq( $in, $out, $options ) }
elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) }
+ elsif ( $script eq "soap_seq" ) { script_soap_seq( $in, $out, $options ) }
elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) }
elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) }
elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) }
ooc|c
);
}
+ elsif ( $script eq "soap_seq" )
+ {
+ @options = qw(
+ in_file|i=s
+ genome|g=s
+ cpus|c=s
+ );
+ }
elsif ( $script eq "match_seq" )
{
@options = qw(
Maasha::Common::error( qq(no --database or --genome specified) ) if $script eq "blast_seq" and not $options{ "genome" } and not $options{ "database" };
Maasha::Common::error( qq(both --database and --genome specified) ) if $script eq "blast_seq" and $options{ "genome" } and $options{ "database" };
Maasha::Common::error( qq(no --index_name or --genome specified) ) if $script eq "vmatch_seq" and not $options{ "genome" } and not $options{ "index_name" };
- Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index" };
+ Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index_name" };
+ Maasha::Common::error( qq(no --in_file or --genome specified) ) if $script eq "soap_seq" and not $options{ "genome" } and not $options{ "in_file" };
+ Maasha::Common::error( qq(both --in_file and --genome specified) ) if $script eq "soap_seq" and $options{ "genome" } and $options{ "in_file" };
Maasha::Common::error( qq(no --genome specified) ) if $script =~ /format_genome|get_genome_seq|get_genome_align|get_genome_phastcons|blat_seq|plot_phastcons_profiles|plot_karyogram/ and not $options{ "genome" };
Maasha::Common::error( qq(no --key specified) ) if $script =~ /plot_lendist|plot_histogram/ and not $options{ "key" };
Maasha::Common::error( qq(no --keys speficied) ) if $script =~ /sort_records|count_vals|sum_vals|mean_vals|median_vals|length_vals/ and not $options{ "keys" };
$options->{ "filter" } = "T" if $options->{ "filter" };
$options->{ "cpus" } ||= 1;
- $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna"if $options->{ 'genome' };
+ $options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/blast/$options->{ 'genome' }.fna" if $options->{ 'genome' };
$tmp_in = "$BP_TMP/blast_query.seq";
$tmp_out = "$BP_TMP/blast.result";
}
+sub script_soap_seq
+{
+ # Martin A. Hansen, July 2008.
+
+ # soap sequences in stream against a given file or genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $genome, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields );
+
+ $options->{ "cpus" } ||= 1;
+
+ $options->{ "in_file" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna" if $options->{ 'genome' };
+
+ $tmp_in = "$BP_TMP/soap_query.seq";
+ $tmp_out = "$BP_TMP/soap.result";
+
+ $fh_out = Maasha::Common::write_open( $tmp_in );
+
+ while ( $record = get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out );
+ }
+
+ put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ Maasha::Common::run( "soap", "-r 2 -a $tmp_in -p $options->{ 'cpus' } -d $options->{ 'in_file' } -o $tmp_out > /dev/null 2>&1", 1 );
+
+ unlink $tmp_in;
+
+ $fh_out = Maasha::Common::read_open( $tmp_out );
+
+ undef $record;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+
+print Dumper( $line );
+
+ next if $line =~ /^#/;
+
+ @fields = split /\s+/, $line;
+
+ $record->{ "REC_TYPE" } = "BLAST";
+ $record->{ "Q_ID" } = $fields[ 0 ];
+ $record->{ "S_ID" } = $fields[ 1 ];
+ $record->{ "IDENT" } = $fields[ 2 ];
+ $record->{ "ALIGN_LEN" } = $fields[ 3 ];
+ $record->{ "MISMATCHES" } = $fields[ 4 ];
+ $record->{ "GAPS" } = $fields[ 5 ];
+ $record->{ "Q_BEG" } = $fields[ 6 ] - 1; # BLAST is 1-based
+ $record->{ "Q_END" } = $fields[ 7 ] - 1; # BLAST is 1-based
+ $record->{ "S_BEG" } = $fields[ 8 ] - 1; # BLAST is 1-based
+ $record->{ "S_END" } = $fields[ 9 ] - 1; # BLAST is 1-based
+ $record->{ "E_VAL" } = $fields[ 10 ];
+ $record->{ "BIT_SCORE" } = $fields[ 11 ];
+
+ if ( $record->{ "S_BEG" } > $record->{ "S_END" } )
+ {
+ $record->{ "STRAND" } = '-';
+
+ ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } );
+ }
+ else
+ {
+ $record->{ "STRAND" } = '+';
+ }
+
+ put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ unlink $tmp_out;
+}
+
+
sub script_match_seq
{
# Martin A. Hansen, August 2007.