]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
added current as option is lists of file names, processors now outputted with current...
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
23
24
25 //**********************************************************************************************************************
26 vector<string> PairwiseSeqsCommand::setParameters(){    
27         try {
28                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
29                 CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign);
30                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
31                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
32                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
33                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
34                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
35                 CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
36                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
37                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
38                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
39                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                 
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "PairwiseSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string PairwiseSeqsCommand::getHelpString(){    
54         try {
55                 string helpString = "";
56                 helpString += "The pairwise.seqs command reads a fasta file and creates distance matrix.\n";
57                 helpString += "The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n";
58                 helpString += "The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
59                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
60                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
61                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
62                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
63                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
64                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
65                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
66                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
67                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
68                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
69                 helpString += "The pairwise.seqs command should be in the following format: \n";
70                 helpString += "pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n";
71                 helpString += "Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n";
72                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
73                 return helpString;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "PairwiseSeqsCommand", "getHelpString");
77                 exit(1);
78         }
79 }
80
81 //**********************************************************************************************************************
82 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["phylip"] = tempOutNames;
88                 outputTypes["column"] = tempOutNames;
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
92                 exit(1);
93         }
94 }
95 //**********************************************************************************************************************
96 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
97         try {
98                 abort = false; calledHelp = false;   
99         
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 
103                 else {
104                         vector<string> myArray = setParameters();
105                         
106                         OptionParser parser(option);
107                         map<string, string> parameters = parser.getParameters(); 
108                         
109                         ValidParameters validParameter("pairwise.seqs");
110                         map<string, string>::iterator it;
111                         
112                         //check to make sure all parameters are valid for command
113                         for (it = parameters.begin(); it != parameters.end(); it++) { 
114                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
115                         }
116                         
117                         //initialize outputTypes
118                         vector<string> tempOutNames;
119                         outputTypes["phylip"] = tempOutNames;
120                         outputTypes["column"] = tempOutNames;
121                         
122                         //if the user changes the output directory command factory will send this info to us in the output parameter 
123                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
124                         
125
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         
129                         if (inputDir == "not found"){   inputDir = "";          }
130
131                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
132                         if (fastaFileName == "not found") {                             
133                                 //if there is a current fasta file, use it
134                                 string filename = m->getFastaFile(); 
135                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
136                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
137                         }
138                         else { 
139                                 m->splitAtDash(fastaFileName, fastaFileNames);
140                                 
141                                 //go through files and make sure they are good, if not, then disregard them
142                                 for (int i = 0; i < fastaFileNames.size(); i++) {
143                                         
144                                         bool ignore = false;
145                                         if (fastaFileNames[i] == "current") { 
146                                                 fastaFileNames[i] = m->getFastaFile(); 
147                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
148                                                 else {  
149                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
150                                                         //erase from file list
151                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
152                                                         i--;
153                                                 }
154                                         }
155                                         
156                                         if (!ignore) {
157                                         
158                                                 if (inputDir != "") {
159                                                         string path = m->hasPath(fastaFileNames[i]);
160                                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
162                                                 }
163                 
164                                                 int ableToOpen;
165                                                 ifstream in;
166
167                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
168                                         
169                                                 //if you can't open it, try default location
170                                                 if (ableToOpen == 1) {
171                                                         if (m->getDefaultPath() != "") { //default path is set
172                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
173                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
174                                                                 ifstream in2;
175                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
176                                                                 in2.close();
177                                                                 fastaFileNames[i] = tryPath;
178                                                         }
179                                                 }
180                                                 
181                                                 //if you can't open it, try output location
182                                                 if (ableToOpen == 1) {
183                                                         if (m->getOutputDir() != "") { //default path is set
184                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
185                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
186                                                                 ifstream in2;
187                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
188                                                                 in2.close();
189                                                                 fastaFileNames[i] = tryPath;
190                                                         }
191                                                 }
192                                                 
193                                                 in.close();                                     
194
195                                                 if (ableToOpen == 1) { 
196                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
197                                                         //erase from file list
198                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
199                                                         i--;
200                                                 }
201                                         }
202                                 }
203                                 
204                                 //make sure there is at least one valid file left
205                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
206                         }
207                 
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
212                         convert(temp, match);  
213                         
214                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
215                         convert(temp, misMatch);  
216                         
217                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
218                         convert(temp, gapOpen);  
219                         
220                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
221                         convert(temp, gapExtend); 
222                         
223                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
224                         m->setProcessors(temp);
225                         convert(temp, processors);
226                         
227                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
228                         convert(temp, cutoff); 
229                         
230                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
231                         countends = m->isTrue(temp); 
232                         
233                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
234                         compress = m->isTrue(temp); 
235                         
236                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
237                         
238                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
239                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
240                         
241                         calc = validParameter.validFile(parameters, "calc", false);                     
242                         if (calc == "not found") { calc = "onegap";  }
243                         else { 
244                                  if (calc == "default")  {  calc = "onegap";  }
245                         }
246                         m->splitAtDash(calc, Estimators);
247                         
248                         ValidCalculators validCalculator;
249                         if (countends) {
250                                 for (int i=0; i<Estimators.size(); i++) {
251                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
252                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
253                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
254                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
255                                         }
256                                 }
257                         }else {
258                                 for (int i=0; i<Estimators.size(); i++) {
259                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
260                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
261                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
262                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
263                                         }
264                                 }
265                         }
266                 }
267                 
268         }
269         catch(exception& e) {
270                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
271                 exit(1);
272         }
273 }
274 //**********************************************************************************************************************
275
276 int PairwiseSeqsCommand::execute(){
277         try {
278                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
279                 
280                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
281                 
282                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
283                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
284                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
285                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
286                 else {
287                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
288                         m->mothurOutEndLine();
289                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
290                 }
291                 
292                 cutoff += 0.005;
293                 
294                 for (int s = 0; s < fastaFileNames.size(); s++) {
295                         if (m->control_pressed) { outputTypes.clear(); return 0; }
296                         
297                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
298                         
299                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
300                         
301                         ifstream inFASTA;
302                         m->openInputFile(fastaFileNames[s], inFASTA);
303                         alignDB = SequenceDB(inFASTA); 
304                         inFASTA.close();
305                         
306                         int numSeqs = alignDB.getNumSeqs();
307                         int startTime = time(NULL);
308                         string outputFile = "";
309                                 
310                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
311                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
312                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
313                         }else if (output == "column") { //user wants column format
314                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
315                                 outputTypes["column"].push_back(outputFile);
316                                 remove(outputFile.c_str());
317                         }else { //assume square
318                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
319                                 remove(outputFile.c_str());
320                                 outputTypes["phylip"].push_back(outputFile);
321                         }
322                         
323                         #ifdef USE_MPI
324                 
325                         int pid, start, end; 
326                         int tag = 2001;
327                                         
328                         MPI_Status status; 
329                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
330                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
331                         
332                         //each process gets where it should start and stop in the file
333                         if (output != "square") {
334                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
335                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
336                         }else{
337                                 start = int ((float(pid)/float(processors)) * numSeqs);
338                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
339                         }
340                         
341                         if (output == "column") {
342                                 MPI_File outMPI;
343                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
344
345                         char filename[1024];
346                                 strcpy(filename, outputFile.c_str());
347                                 
348                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
349
350                                 if (pid == 0) { //you are the root process 
351                                 
352                                         //do your part
353                                         string outputMyPart;
354                                         
355                                         driverMPI(start, end, outMPI, cutoff); 
356                                         
357                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
358                                 
359                                         //wait on chidren
360                                         for(int i = 1; i < processors; i++) { 
361                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
362                                                 
363                                                 char buf[5];
364                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
365                                         }
366                                 }else { //you are a child process
367                                         //do your part
368                                         driverMPI(start, end, outMPI, cutoff); 
369                                         
370                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
371                                 
372                                         char buf[5];
373                                         strcpy(buf, "done"); 
374                                         //tell parent you are done.
375                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
376                                 }
377                                 
378                                 MPI_File_close(&outMPI);
379                                 
380                         }else { //lower triangle format
381                                 if (pid == 0) { //you are the root process 
382                                 
383                                         //do your part
384                                         string outputMyPart;
385                                         unsigned long int mySize;
386                                         
387                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
388                                         else { driverMPI(start, end, outputFile, mySize, output); }
389                 
390                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
391                                         
392                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
393                                         MPI_File outMPI;
394                                         MPI_File inMPI;
395
396                                         char filename[1024];
397                                         strcpy(filename, outputFile.c_str());
398
399                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
400
401                                         //wait on chidren
402                                         for(int b = 1; b < processors; b++) { 
403                                                 unsigned long int fileSize;
404                                                 
405                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
406                                                 
407                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
408                                                 
409                                                 string outTemp = outputFile + toString(b) + ".temp";
410
411                                                 char* buf = new char[outTemp.length()];
412                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
413                                                 
414                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
415                                                 delete buf;
416
417                                                 int count = 0;
418                                                 while (count < fileSize) { 
419                                                         char buf2[1];
420                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
421                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
422                                                         count += 1;
423                                                 }
424                                                 
425                                                 MPI_File_close(&inMPI); //deleted on close
426                                         }
427                                         
428                                         MPI_File_close(&outMPI);
429                                 }else { //you are a child process
430                                         //do your part
431                                         unsigned long int size;
432                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
433                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
434                                         
435                                         if (m->control_pressed) { delete distCalculator;  return 0; }
436                                 
437                                         //tell parent you are done.
438                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
439                                 }
440                         }
441                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
442         #else           
443                                         
444                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
445                         //if you don't need to fork anything
446                         if(processors == 1){
447                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
448                                 else { driver(0, numSeqs, outputFile, "square");  }
449                         }else{ //you have multiple processors
450                                 
451                                 for (int i = 0; i < processors; i++) {
452                                         distlinePair tempLine;
453                                         lines.push_back(tempLine);
454                                         if (output != "square") {
455                                                 lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
456                                                 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
457                                         }else{
458                                                 lines[i].start = int ((float(i)/float(processors)) * numSeqs);
459                                                 lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
460                                         }
461                                 }
462                                 
463                                 createProcesses(outputFile); 
464                         }
465                 #else
466                         //ifstream inFASTA;
467                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
468                         else { driver(0, numSeqs, outputFile, "square");  }
469                 #endif
470                 
471         #endif
472                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
473                         
474                         #ifdef USE_MPI
475                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
476                                                 
477                                 if (pid == 0) { //only one process should output to screen
478                         #endif
479                         
480                         ifstream fileHandle;
481                         fileHandle.open(outputFile.c_str());
482                         if(fileHandle) {
483                                 m->gobble(fileHandle);
484                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
485                         }
486                         
487                         if (compress) {
488                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
489                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
490                                 system(("gzip -v " + outputFile).c_str());
491                                 outputNames.push_back(outputFile + ".gz");
492                         }else { outputNames.push_back(outputFile); }
493                         
494                         #ifdef USE_MPI
495                                 }
496                         #endif
497                         
498                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
499                         
500                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
501                 }
502                         
503                 delete distCalculator;
504                 
505                 //set phylip file as new current phylipfile
506                 string current = "";
507                 itTypes = outputTypes.find("phylip");
508                 if (itTypes != outputTypes.end()) {
509                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
510                 }
511                 
512                 //set column file as new current columnfile
513                 itTypes = outputTypes.find("column");
514                 if (itTypes != outputTypes.end()) {
515                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
516                 }
517                 
518                 m->mothurOutEndLine();
519                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
520                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
521                 m->mothurOutEndLine();
522                 
523
524                 return 0;
525         }
526         catch(exception& e) {
527                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
528                 exit(1);
529         }
530 }
531
532 /**************************************************************************************************/
533 void PairwiseSeqsCommand::createProcesses(string filename) {
534         try {
535 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
536                 int process = 1;
537                 processIDS.clear();
538                 
539                 //loop through and create all the processes you want
540                 while (process != processors) {
541                         int pid = fork();
542                         
543                         if (pid > 0) {
544                                 processIDS.push_back(pid); 
545                                 process++;
546                         }else if (pid == 0){
547                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
548                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
549                                 exit(0);
550                         }else { 
551                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
552                                 for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; kill (temp, SIGINT); }
553                                 exit(0);
554                         }
555                 }
556                 
557                 //parent do my part
558                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
559                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
560
561                 
562                 //force parent to wait until all the processes are done
563                 for (int i=0;i<processIDS.size();i++) { 
564                         int temp = processIDS[i];
565                         wait(&temp);
566                 }
567                 
568                 //append and remove temp files
569                 for (int i=0;i<processIDS.size();i++) { 
570                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
571                         remove((filename + toString(processIDS[i]) + ".temp").c_str());
572                 }
573 #endif
574         }
575         catch(exception& e) {
576                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
577                 exit(1);
578         }
579 }
580
581 /**************************************************************************************************/
582 /////// need to fix to work with calcs and sequencedb
583 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
584         try {
585
586                 int startTime = time(NULL);
587                 
588                 //column file
589                 ofstream outFile(dFileName.c_str(), ios::trunc);
590                 outFile.setf(ios::fixed, ios::showpoint);
591                 outFile << setprecision(4);
592                 
593                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
594                 
595                 for(int i=startLine;i<endLine;i++){
596                         if(output == "lt")      {       
597                                 string name = alignDB.get(i).getName();
598                                 if (name.length() < 10) { //pad with spaces to make compatible
599                                         while (name.length() < 10) {  name += " ";  }
600                                 }
601                                 outFile << name << '\t';        
602                         }
603                         
604                         for(int j=0;j<i;j++){
605                                 
606                                 if (m->control_pressed) { outFile.close(); return 0;  }
607                                 
608                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
609                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
610                                 }
611                                 
612                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
613                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
614                                 }
615                                 
616                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
617                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
618                                 
619                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
620                                 seqI.setAligned(alignment->getSeqAAln());
621                                 seqJ.setAligned(alignment->getSeqBAln());
622
623                                 
624                                 distCalculator->calcDist(seqI, seqJ);
625                                 double dist = distCalculator->getDist();
626                                 
627                                 if(dist <= cutoff){
628                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
629                                 }
630                                 if (output == "lt") {  outFile << dist << '\t'; }
631                         }
632                         
633                         if (output == "lt") { outFile << endl; }
634                         
635                         if(i % 100 == 0){
636                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
637                         }
638                         
639                 }
640                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
641                 
642                 outFile.close();
643                 
644                 return 1;
645         }
646         catch(exception& e) {
647                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
648                 exit(1);
649         }
650 }
651 /**************************************************************************************************/
652 /////// need to fix to work with calcs and sequencedb
653 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
654         try {
655
656                 int startTime = time(NULL);
657                 
658                 //column file
659                 ofstream outFile(dFileName.c_str(), ios::trunc);
660                 outFile.setf(ios::fixed, ios::showpoint);
661                 outFile << setprecision(4);
662                 
663                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
664                 
665                 for(int i=startLine;i<endLine;i++){
666                                 
667                         string name = alignDB.get(i).getName();
668                         //pad with spaces to make compatible
669                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
670                                 
671                         outFile << name << '\t';        
672                         
673                         for(int j=0;j<alignDB.getNumSeqs();j++){
674                                 
675                                 if (m->control_pressed) { outFile.close(); return 0;  }
676                                 
677                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
678                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
679                                 }
680                                 
681                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
682                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
683                                 }
684                                 
685                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
686                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
687                                 
688                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
689                                 seqI.setAligned(alignment->getSeqAAln());
690                                 seqJ.setAligned(alignment->getSeqBAln());
691                                 
692                                 distCalculator->calcDist(seqI, seqJ);
693                                 double dist = distCalculator->getDist();
694                                                                 
695                                 outFile << dist << '\t'; 
696                         }
697                         
698                         outFile << endl; 
699                         
700                         if(i % 100 == 0){
701                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
702                         }
703                         
704                 }
705                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
706                 
707                 outFile.close();
708                 
709                 return 1;
710         }
711         catch(exception& e) {
712                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
713                 exit(1);
714         }
715 }
716 #ifdef USE_MPI
717 /**************************************************************************************************/
718 /////// need to fix to work with calcs and sequencedb
719 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
720         try {
721                 MPI_Status status;
722                 int startTime = time(NULL);
723                 
724                 string outputString = "";
725                 
726                 for(int i=startLine;i<endLine;i++){
727         
728                         for(int j=0;j<i;j++){
729                                 
730                                 if (m->control_pressed) {  return 0;  }
731                                 
732                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
733                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
734                                 }
735                                 
736                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
737                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
738                                 }
739                                 
740                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
741                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
742                                 
743                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
744                                 seqI.setAligned(alignment->getSeqAAln());
745                                 seqJ.setAligned(alignment->getSeqBAln());
746                                 
747                                 distCalculator->calcDist(seqI, seqJ);
748                                 double dist = distCalculator->getDist();
749                                 
750                                 if(dist <= cutoff){
751                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
752                                 }
753                         }
754                         
755                         if(i % 100 == 0){
756                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
757                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
758                         }
759                         
760                          
761                         //send results to parent
762                         int length = outputString.length();
763
764                         char* buf = new char[length];
765                         memcpy(buf, outputString.c_str(), length);
766                         
767                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
768                         outputString = "";
769                         delete buf;
770                         
771                 }
772                 
773                 return 1;
774         }
775         catch(exception& e) {
776                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
777                 exit(1);
778         }
779 }
780 /**************************************************************************************************/
781 /////// need to fix to work with calcs and sequencedb
782 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
783         try {
784                 MPI_Status status;
785                 
786                 MPI_File outMPI;
787                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
788
789                 char filename[1024];
790                 strcpy(filename, file.c_str());
791
792                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
793
794                 
795                 
796                 string outputString = "";
797                 size = 0;
798                 
799                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
800                 
801                 for(int i=startLine;i<endLine;i++){
802                                 
803                         string name = alignDB.get(i).getName();
804                         if (name.length() < 10) { //pad with spaces to make compatible
805                                 while (name.length() < 10) {  name += " ";  }
806                         }
807                         outputString += name + "\t";    
808                         
809                         for(int j=0;j<i;j++){
810                                 
811                                 if (m->control_pressed) {  return 0;  }
812                                 
813                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
814                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
815                                 }
816                                 
817                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
818                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
819                                 }
820                                 
821                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
822                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
823                                 
824                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
825                                 seqI.setAligned(alignment->getSeqAAln());
826                                 seqJ.setAligned(alignment->getSeqBAln());
827                                 
828                                 distCalculator->calcDist(seqI, seqJ);
829                                 double dist = distCalculator->getDist();
830                                 
831                                 outputString += toString(dist) + "\t"; 
832                         }
833                         
834                         outputString += "\n"; 
835                         
836                         //send results to parent
837                         int length = outputString.length();
838                         char* buf = new char[length];
839                         memcpy(buf, outputString.c_str(), length);
840                         
841                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
842                         size += outputString.length();
843                         outputString = "";
844                         delete buf;
845                 }
846                 
847                 MPI_File_close(&outMPI);
848                 
849                 return 1;
850         }
851         catch(exception& e) {
852                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
853                 exit(1);
854         }
855 }
856 /**************************************************************************************************/
857 /////// need to fix to work with calcs and sequencedb
858 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
859         try {
860                 MPI_Status status;
861                 
862                 MPI_File outMPI;
863                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
864
865                 char filename[1024];
866                 strcpy(filename, file.c_str());
867
868                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
869                 
870                 
871                 
872                 string outputString = "";
873                 size = 0;
874                 
875                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
876                 
877                 for(int i=startLine;i<endLine;i++){
878                                 
879                         string name = alignDB.get(i).getName();
880                         if (name.length() < 10) { //pad with spaces to make compatible
881                                 while (name.length() < 10) {  name += " ";  }
882                         }
883                         outputString += name + "\t";    
884                         
885                         for(int j=0;j<alignDB.getNumSeqs();j++){
886                                 
887                                 if (m->control_pressed) {  return 0;  }
888                                 
889                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
890                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
891                                 }
892                                 
893                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
894                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
895                                 }
896                                 
897                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
898                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
899                                 
900                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
901                                 seqI.setAligned(alignment->getSeqAAln());
902                                 seqJ.setAligned(alignment->getSeqBAln());
903                                 
904                                 distCalculator->calcDist(seqI, seqJ);
905                                 double dist = distCalculator->getDist();
906                                 
907                                 outputString += toString(dist) + "\t"; 
908                         }
909                         
910                         outputString += "\n"; 
911
912                         //send results to parent
913                         int length = outputString.length();
914                         char* buf = new char[length];
915                         memcpy(buf, outputString.c_str(), length);
916                         
917                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
918                         size += outputString.length();
919                         outputString = "";
920                         delete buf;
921                 }
922                 
923                 MPI_File_close(&outMPI);
924                 
925                 return 1;
926         }
927         catch(exception& e) {
928                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
929                 exit(1);
930         }
931 }
932 #endif
933 /**************************************************************************************************/
934