# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use Data::Dumper;
use Storable qw( dclone );
use Maasha::Common;
+use Maasha::Filesys;
use Maasha::Fasta;
use Maasha::Seq;
use Maasha::Biopieces;
push @args, "-F";
push @args, "-l $options->{ 'word_size' }";
push @args, "-maxmatch";
- push @args, "-n" if not Maasha::Seq::seq_guess_type( $entries1->[ 0 ]->[ 1 ] ) eq "protein";
+ push @args, "-n" if not Maasha::Seq::seq_guess_type( $entries1->[ 0 ]->[ 1 ] ) eq "PROTEIN";
push @args, "-b" if $options->{ "direction" } =~ /^b/;
push @args, "-r" if $options->{ "direction" } =~ /^r/;
Maasha::Common::run( "mummer", "$arg $file_in1 $file_in2 > $file_out 2>/dev/null" );
- $fh = Maasha::Common::read_open( $file_out );
+ $fh = Maasha::Filesys::file_read_open( $file_out );
while ( $line = <$fh> )
{
push @results, dclone $result;
}
-
}
unlink $file_in1;
}
-sub match_vmatch
-{
- # Martin A. Hansen, April 2008.
-
- # Vmatches a list of records against a list of index files and the full
- # path to the result file is returned.
-
- my ( $tmp_dir, # directory in where to save temp files
- $records, # list of records
- $index_files, # list of index files
- $options, # argument hash
- ) = @_;
-
- # Returns a string.
-
- my ( $query_file, $result_file, @result_files, $fh_in, $fh_out, $line, @fields, $i, $record, $vmatch_args, @index_names, @seq_names, $count_list, $entry );
-
- $query_file = "$tmp_dir/query.seq";
- $result_file = "$tmp_dir/vmatch.out";
-
- $fh_out = Maasha::Common::write_open( $query_file );
-
- foreach $record ( @{ $records } )
- {
- if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) )
- {
- next if length $entry->[ SEQ ] < 12; # assuming that the index is created for 12 as minimum length
-
- push @seq_names, $entry->[ SEQ_NAME ];
-
- Maasha::Fasta::put_entry( $entry, $fh_out, 100 );
- }
- }
-
- close $fh_out;
-
- if ( $options->{ 'genome' } ) {
- $vmatch_args = "-complete -d -p -q $query_file";
- } else {
- $vmatch_args = "-complete -d -p -showdesc 100 -q $query_file";
- }
-
- $vmatch_args .= " -h " . $options->{ "hamming_dist" } if $options->{ "hamming_dist" };
- $vmatch_args .= " -e " . $options->{ "edit_dist" } if $options->{ "edit_dist" };
-
- for ( $i = 0; $i < @{ $index_files }; $i++ )
- {
- Maasha::Common::run( "vmatch", "$vmatch_args $index_files->[ $i ] > $result_file.$i" );
-
- push @result_files, "$result_file.$i";
- }
-
- unlink $query_file;
-
- $count_list = vmatch_count_hits( \@result_files ) if ( $options->{ "count" } );
-
- $fh_out = Maasha::Common::write_open( $result_file );
-
- for ( $i = 0; $i < @{ $index_files }; $i++ )
- {
- $index_files->[ $i ] =~ s/.+\/(.+)\.fna$/$1/ if $options->{ 'genome' };
-
- $fh_in = Maasha::Common::read_open( "$result_file.$i" );
-
- while ( $line = <$fh_in> )
- {
- chomp $line;
-
- next if $line =~ /^#/;
-
- @fields = split " ", $line;
-
- next if $options->{ "max_hits" } and $count_list->[ $fields[ 5 ] ] > $options->{ 'max_hits' };
-
- $fields[ 1 ] = $index_files->[ $i ]; # S_ID
- $fields[ 9 ] = $count_list->[ $fields[ 5 ] ] if $options->{ "count" }; # SCORE
- $fields[ 5 ] = $seq_names[ $fields[ 5 ] ]; # Q_ID
-
- print $fh_out join( "\t", @fields ), "\n";
- }
-
- close $fh_in;
-
- unlink "$result_file.$i";
- }
-
- close $fh_out;
-
- return $result_file;
-}
-
-
-sub vmatch_count_hits
-{
- # Martin A. Hansen, April 2008.
-
- # Given a list of Vmatch result file, count duplications based
- # on q_id. The counts are returned in a list where the list index
- # corresponds to the q_id index in the query file.
-
- my ( $files, # vmatch result files
- ) = @_;
-
- # Returns a list.
-
- my ( $file, $fh_in, $line, @fields, @count_list );
-
- foreach $file ( @{ $files } )
- {
- $fh_in = Maasha::Common::read_open( $file );
-
- while ( $line = <$fh_in> )
- {
- chomp $line;
-
- next if $line =~ /^#/;
-
- @fields = split " ", $line;
-
- $count_list[ $fields[ 5 ] ]++;
- }
-
- close $fh_in;
- }
-
- return wantarray ? @count_list : \@count_list;
-}
-
-
-sub vmatch_get_entry
-{
- # Martin A. Hansen, January 2008.
-
- # Parses vmatch output records.
-
- my ( $fh, # file handle to vmatch result file.
- ) = @_;
-
- # Returns a hash.
-
- my ( $line, @fields, %record );
-
- while ( $line = <$fh> )
- {
- chomp $line;
-
- next if $line =~ /^#/;
-
- @fields = split "\t", $line;
-
- $record{ "REC_TYPE" } = "VMATCH";
-
- $record{ "S_LEN" } = $fields[ 0 ];
- $record{ "S_ID" } = $fields[ 1 ];
- $record{ "S_BEG" } = $fields[ 2 ];
-
- if ( $fields[ 3 ] eq "D" ) {
- $record{ "STRAND" } = "+";
- } else {
- $record{ "STRAND" } = "-";
- }
-
- $record{ "Q_LEN" } = $fields[ 4 ];
- $record{ "Q_ID" } = $fields[ 5 ];
- $record{ "Q_BEG" } = $fields[ 6 ];
- $record{ "MATCH_DIST" } = $fields[ 7 ];
- $record{ "E_VAL" } = $fields[ 8 ];
- $record{ "SCORE" } = $fields[ 9 ];
- $record{ "IDENT" } = $fields[ 10 ];
-
- $record{ "Q_END" } = $record{ "Q_BEG" } + $record{ "Q_LEN" } - 1;
- $record{ "S_END" } = $record{ "S_BEG" } + $record{ "S_LEN" } - 1;
-
- return wantarray ? %record : \%record;
- }
-}
-
-
sub vmatch_index
{
# Martin A. Hansen, July 2008.
my ( $fh, $entry, $tmp_file );
- Maasha::Common::dir_create_if_not_exists( $dst_dir );
+ Maasha::Filesys::dir_create_if_not_exists( $dst_dir );
# if ( Maasha::Common::file_size( $file ) < 200_000_000 )
# {
# }
# else
# {
- $fh = Maasha::Common::read_open( "$src_dir/$file" );
+ $fh = Maasha::Filesys::file_read_open( "$src_dir/$file" );
while ( $entry = Maasha::Fasta::get_entry( $fh ) )
{
$tmp_file = $entry->[ SEQ_NAME ] . ".fna";
- Maasha::Fasta::put_entries( [ $entry ], "$tmp_dir/$tmp_file" )
+ Maasha::Fasta::put_entries( [ $entry ], "$tmp_dir/$tmp_file" );
- &Maasha::Common::run( "mkvtree", "-db $tmp_dir/$tmp_file -dna -pl -allout -indexname $dst_dir/$tmp_file > /dev/null 3>&1" );
+ Maasha::Common::run( "mkvtree", "-db $tmp_dir/$tmp_file -dna -pl -allout -indexname $dst_dir/$tmp_file > /dev/null 3>&1" );
unlink "$tmp_dir/$tmp_file";
}