3 # Copyright (C) 2007 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines to match sequences
28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
34 use Storable qw( dclone );
39 use Maasha::Biopieces;
40 use vars qw ( @ISA @EXPORT );
47 @ISA = qw( Exporter );
50 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
55 # Martin A. Hansen, June 2007.
57 # Match sequences using MUMmer.
59 my ( $entries1, # FASTA entries
60 $entries2, # FASTA entries
61 $options, # additional MUMmer options - OPTIONAL
62 $tmp_dir, # temporary directory
67 my ( @args, $arg, $file_in1, $file_in2, $cmd, $file_out, $fh, $line, $result, @results );
69 $tmp_dir ||= $ENV{ "BP_TMP" };
71 $options->{ "word_size" } ||= 20;
72 $options->{ "direction" } ||= "both";
77 push @args, "-l $options->{ 'word_size' }";
78 push @args, "-maxmatch";
79 push @args, "-n" if not Maasha::Seq::seq_guess_type( $entries1->[ 0 ]->[ 1 ] ) eq "PROTEIN";
80 push @args, "-b" if $options->{ "direction" } =~ /^b/;
81 push @args, "-r" if $options->{ "direction" } =~ /^r/;
83 $arg = join " ", @args;
85 $file_in1 = "$tmp_dir/muscle1.tmp";
86 $file_in2 = "$tmp_dir/muscle2.tmp";
87 $file_out = "$tmp_dir/muscle3.tmp";
89 map { $_->[ 0 ] =~ tr/ /_/ } @{ $entries1 };
90 map { $_->[ 0 ] =~ tr/ /_/ } @{ $entries2 };
92 Maasha::Fasta::put_entries( $entries1, $file_in1 );
93 Maasha::Fasta::put_entries( $entries2, $file_in2 );
95 Maasha::Common::run( "mummer", "$arg $file_in1 $file_in2 > $file_out 2>/dev/null" );
97 $fh = Maasha::Filesys::file_read_open( $file_out );
99 while ( $line = <$fh> )
103 if ( $line =~ /^> (.+)Reverse\s+Len = (\d+)$/ )
105 $result->{ "Q_ID" } = $1;
106 $result->{ "Q_LEN" } = $2;
107 $result->{ "DIR" } = "reverse";
109 elsif ( $line =~ /^> (.+)Len = (\d+)$/ )
111 $result->{ "Q_ID" } = $1;
112 $result->{ "Q_LEN" } = $2;
113 $result->{ "DIR" } = "forward";
115 elsif ( $line =~ /^\s*(.\S+)\s+(\d+)\s+(\d+)\s+(\d+)$/ )
117 $result->{ "S_ID" } = $1;
118 $result->{ "S_BEG" } = $2 - 1;
119 $result->{ "Q_BEG" } = $3 - 1;
120 $result->{ "HIT_LEN" } = $4;
121 $result->{ "S_END" } = $result->{ "S_BEG" } + $result->{ "HIT_LEN" } - 1;
122 $result->{ "Q_END" } = $result->{ "Q_BEG" } + $result->{ "HIT_LEN" } - 1;
124 push @results, dclone $result;
132 return wantarray ? @results : \@results;
138 # Martin A. Hansen, July 2008.
140 # Use mkvtree to create a vmatch index of a given file
142 my ( $file, # FASTA file to index
143 $src_dir, # source directory with FASTA file
144 $dst_dir, # distination directory for Vmatch index
145 $tmp_dir, # temp directory - OPTIONAL
150 my ( $fh, $entry, $tmp_file );
152 Maasha::Filesys::dir_create_if_not_exists( $dst_dir );
154 # if ( Maasha::Common::file_size( $file ) < 200_000_000 )
156 # &Maasha::Common::run( "mkvtree", "-db $src_dir/$file -dna -pl -allout -indexname $dst_dir/$file > /dev/null 3>&1" );
160 $fh = Maasha::Filesys::file_read_open( "$src_dir/$file" );
162 while ( $entry = Maasha::Fasta::get_entry( $fh ) )
164 $tmp_file = $entry->[ SEQ_NAME ] . ".fna";
166 Maasha::Fasta::put_entries( [ $entry ], "$tmp_dir/$tmp_file" );
168 Maasha::Common::run( "mkvtree", "-db $tmp_dir/$tmp_file -dna -pl -allout -indexname $dst_dir/$tmp_file > /dev/null 3>&1" );
170 unlink "$tmp_dir/$tmp_file";
177 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<