]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
cluster.split fix
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17 #include "nast.hpp"
18
19 #include "ignoregaps.h"
20 #include "eachgapdist.h"
21 #include "eachgapignore.h"
22 #include "onegapdist.h"
23 #include "onegapignore.h"
24
25
26 //**********************************************************************************************************************
27 vector<string> PairwiseSeqsCommand::getValidParameters(){       
28         try {
29                 string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","calc","compress","cutoff","countends","output","outputdir","inputdir"};
30                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "PairwiseSeqsCommand", "getValidParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> PairwiseSeqsCommand::getRequiredParameters(){    
40         try {
41                 string AlignArray[] =  {"fasta"};
42                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> PairwiseSeqsCommand::getRequiredFiles(){ 
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
63         try {
64                 abort = true;
65                 //initialize outputTypes
66                 vector<string> tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
77         try {
78                 abort = false;
79         
80                 //allow user to run help
81                 if(option == "help") { help(); abort = true; }
82                 
83                 else {
84                         
85                         //valid paramters for this command
86                         string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","cutoff","compress","calc","countends","output","outputdir","inputdir"};
87                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
88                         
89                         OptionParser parser(option);
90                         map<string, string> parameters = parser.getParameters(); 
91                         
92                         ValidParameters validParameter("pairwise.seqs");
93                         map<string, string>::iterator it;
94                         
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["phylip"] = tempOutNames;
103                         outputTypes["column"] = tempOutNames;
104                         
105                         //if the user changes the output directory command factory will send this info to us in the output parameter 
106                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
107                         
108
109                         //if the user changes the input directory command factory will send this info to us in the output parameter 
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
111                         
112                         if (inputDir == "not found"){   inputDir = "";          }
113
114                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
115                         if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the pairwise.seqs command."); m->mothurOutEndLine(); abort = true;  }
116                         else { 
117                                 m->splitAtDash(fastaFileName, fastaFileNames);
118                                 
119                                 //go through files and make sure they are good, if not, then disregard them
120                                 for (int i = 0; i < fastaFileNames.size(); i++) {
121                                         
122                                         if (inputDir != "") {
123                                                 string path = m->hasPath(fastaFileNames[i]);
124                                                 //if the user has not given a path then, add inputdir. else leave path alone.
125                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
126                                         }
127         
128                                         int ableToOpen;
129                                         ifstream in;
130
131                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
132                                 
133                                         //if you can't open it, try default location
134                                         if (ableToOpen == 1) {
135                                                 if (m->getDefaultPath() != "") { //default path is set
136                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
137                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
138                                                         ifstream in2;
139                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
140                                                         in2.close();
141                                                         fastaFileNames[i] = tryPath;
142                                                 }
143                                         }
144                                         
145                                         //if you can't open it, try output location
146                                         if (ableToOpen == 1) {
147                                                 if (m->getOutputDir() != "") { //default path is set
148                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
149                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
150                                                         ifstream in2;
151                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
152                                                         in2.close();
153                                                         fastaFileNames[i] = tryPath;
154                                                 }
155                                         }
156                                         
157                                         in.close();                                     
158
159                                         if (ableToOpen == 1) { 
160                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
161                                                 //erase from file list
162                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
163                                                 i--;
164                                         }
165                                 }
166                                 
167                                 //make sure there is at least one valid file left
168                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
169                         }
170                 
171                         //check for optional parameter and set defaults
172                         // ...at some point should added some additional type checking...
173                         string temp;
174                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
175                         convert(temp, match);  
176                         
177                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
178                         convert(temp, misMatch);  
179                         
180                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
181                         convert(temp, gapOpen);  
182                         
183                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
184                         convert(temp, gapExtend); 
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
187                         convert(temp, processors); 
188                         
189                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
190                         convert(temp, cutoff); 
191                         
192                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
193                         countends = m->isTrue(temp); 
194                         
195                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
196                         compress = m->isTrue(temp); 
197                         
198                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
199                         
200                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
201                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
202                         
203                         calc = validParameter.validFile(parameters, "calc", false);                     
204                         if (calc == "not found") { calc = "onegap";  }
205                         else { 
206                                  if (calc == "default")  {  calc = "onegap";  }
207                         }
208                         m->splitAtDash(calc, Estimators);
209                         
210                         ValidCalculators validCalculator;
211                         if (countends) {
212                                 for (int i=0; i<Estimators.size(); i++) {
213                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
214                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
215                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
216                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
217                                         }
218                                 }
219                         }else {
220                                 for (int i=0; i<Estimators.size(); i++) {
221                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
222                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
223                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
224                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
225                                         }
226                                 }
227                         }
228                 }
229                 
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
233                 exit(1);
234         }
235 }
236 //**********************************************************************************************************************
237 PairwiseSeqsCommand::~PairwiseSeqsCommand(){}
238 //**********************************************************************************************************************
239
240 void PairwiseSeqsCommand::help(){
241         try {
242                 m->mothurOut("The pairwise.seqs command reads a fasta file and creates distance matrix.\n");
243                 m->mothurOut("The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n");
244                 m->mothurOut("The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
245                 m->mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
246                 m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
247                 m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
248                 m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n");
249                 m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n");
250                 m->mothurOut("The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n");
251                 m->mothurOut("The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n");
252                 m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n");
253                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n");
254                 m->mothurOut("The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n");
255                 m->mothurOut("The pairwise.seqs command should be in the following format: \n");
256                 m->mothurOut("pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n");
257                 m->mothurOut("Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n");
258                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "PairwiseSeqsCommand", "help");
262                 exit(1);
263         }
264 }
265
266
267 //**********************************************************************************************************************
268
269 int PairwiseSeqsCommand::execute(){
270         try {
271                 if (abort == true) {    return 0;       }
272                 
273                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
274                 
275                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
276                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
277                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
278                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
279                 else {
280                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
281                         m->mothurOutEndLine();
282                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
283                 }
284                 
285                 cutoff += 0.005;
286                 
287                 for (int s = 0; s < fastaFileNames.size(); s++) {
288                         if (m->control_pressed) { outputTypes.clear(); return 0; }
289                         
290                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
291                         
292                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
293                         
294                         ifstream inFASTA;
295                         m->openInputFile(fastaFileNames[s], inFASTA);
296                         alignDB = SequenceDB(inFASTA); 
297                         inFASTA.close();
298                         
299                         int numSeqs = alignDB.getNumSeqs();
300                         int startTime = time(NULL);
301                         string outputFile = "";
302                                 
303                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
304                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
305                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
306                         }else if (output == "column") { //user wants column format
307                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
308                                 outputTypes["column"].push_back(outputFile);
309                                 remove(outputFile.c_str());
310                         }else { //assume square
311                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
312                                 remove(outputFile.c_str());
313                                 outputTypes["phylip"].push_back(outputFile);
314                         }
315                         
316                         #ifdef USE_MPI
317                 
318                         int pid, start, end; 
319                         int tag = 2001;
320                                         
321                         MPI_Status status; 
322                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
323                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
324                         
325                         //each process gets where it should start and stop in the file
326                         if (output != "square") {
327                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
328                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
329                         }else{
330                                 start = int ((float(pid)/float(processors)) * numSeqs);
331                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
332                         }
333                         
334                         if (output == "column") {
335                                 MPI_File outMPI;
336                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
337
338                         char filename[1024];
339                                 strcpy(filename, outputFile.c_str());
340                                 
341                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
342
343                                 if (pid == 0) { //you are the root process 
344                                 
345                                         //do your part
346                                         string outputMyPart;
347                                         
348                                         driverMPI(start, end, outMPI, cutoff); 
349                                         
350                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
351                                 
352                                         //wait on chidren
353                                         for(int i = 1; i < processors; i++) { 
354                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
355                                                 
356                                                 char buf[5];
357                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
358                                         }
359                                 }else { //you are a child process
360                                         //do your part
361                                         driverMPI(start, end, outMPI, cutoff); 
362                                         
363                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
364                                 
365                                         char buf[5];
366                                         strcpy(buf, "done"); 
367                                         //tell parent you are done.
368                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
369                                 }
370                                 
371                                 MPI_File_close(&outMPI);
372                                 
373                         }else { //lower triangle format
374                                 if (pid == 0) { //you are the root process 
375                                 
376                                         //do your part
377                                         string outputMyPart;
378                                         unsigned long int mySize;
379                                         
380                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
381                                         else { driverMPI(start, end, outputFile, mySize, output); }
382                 
383                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
384                                         
385                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
386                                         MPI_File outMPI;
387                                         MPI_File inMPI;
388
389                                         char filename[1024];
390                                         strcpy(filename, outputFile.c_str());
391
392                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
393
394                                         //wait on chidren
395                                         for(int b = 1; b < processors; b++) { 
396                                                 unsigned long int fileSize;
397                                                 
398                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
399                                                 
400                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
401                                                 
402                                                 string outTemp = outputFile + toString(b) + ".temp";
403
404                                                 char* buf = new char[outTemp.length()];
405                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
406                                                 
407                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
408                                                 delete buf;
409
410                                                 int count = 0;
411                                                 while (count < fileSize) { 
412                                                         char buf2[1];
413                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
414                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
415                                                         count += 1;
416                                                 }
417                                                 
418                                                 MPI_File_close(&inMPI); //deleted on close
419                                         }
420                                         
421                                         MPI_File_close(&outMPI);
422                                 }else { //you are a child process
423                                         //do your part
424                                         unsigned long int size;
425                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
426                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
427                                         
428                                         if (m->control_pressed) { delete distCalculator;  return 0; }
429                                 
430                                         //tell parent you are done.
431                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
432                                 }
433                         }
434                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
435         #else           
436                                         
437                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
438                         //if you don't need to fork anything
439                         if(processors == 1){
440                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
441                                 else { driver(0, numSeqs, outputFile, "square");  }
442                         }else{ //you have multiple processors
443                                 
444                                 for (int i = 0; i < processors; i++) {
445                                         lines.push_back(new linePair());
446                                         if (output != "square") {
447                                                 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
448                                                 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
449                                         }else{
450                                                 lines[i]->start = int ((float(i)/float(processors)) * numSeqs);
451                                                 lines[i]->end = int ((float(i+1)/float(processors)) * numSeqs);
452                                         }
453                                 }
454
455                                 createProcesses(outputFile); 
456                         
457                                 map<int, int>::iterator it = processIDS.begin();
458                                 rename((outputFile + toString(it->second) + ".temp").c_str(), outputFile.c_str());
459                                 it++;
460                                 
461                                 //append and remove temp files
462                                 for (; it != processIDS.end(); it++) {
463                                         m->appendFiles((outputFile + toString(it->second) + ".temp"), outputFile);
464                                         remove((outputFile + toString(it->second) + ".temp").c_str());
465                                 }
466                         }
467                 #else
468                         //ifstream inFASTA;
469                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
470                         else { driver(0, numSeqs, outputFile, "square");  }
471                 #endif
472                 
473         #endif
474                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
475                         
476                         #ifdef USE_MPI
477                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
478                                                 
479                                 if (pid == 0) { //only one process should output to screen
480                         #endif
481                         
482                         ifstream fileHandle;
483                         fileHandle.open(outputFile.c_str());
484                         if(fileHandle) {
485                                 m->gobble(fileHandle);
486                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
487                         }
488                         
489                         if (compress) {
490                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
491                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
492                                 system(("gzip -v " + outputFile).c_str());
493                                 outputNames.push_back(outputFile + ".gz");
494                         }else { outputNames.push_back(outputFile); }
495                         
496                         #ifdef USE_MPI
497                                 }
498                         #endif
499                         
500                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
501                         
502                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
503                 }
504                         
505                 delete distCalculator;
506                 
507                 m->mothurOutEndLine();
508                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
509                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
510                 m->mothurOutEndLine();
511                 
512
513                 return 0;
514         }
515         catch(exception& e) {
516                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
517                 exit(1);
518         }
519 }
520
521 /**************************************************************************************************/
522 void PairwiseSeqsCommand::createProcesses(string filename) {
523         try {
524 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
525                 int process = 0;
526                 processIDS.clear();
527                 
528                 //loop through and create all the processes you want
529                 while (process != processors) {
530                         int pid = fork();
531                         
532                         if (pid > 0) {
533                                 processIDS[lines[process]->end] = pid;  //create map from line number to pid so you can append files in correct order later
534                                 process++;
535                         }else if (pid == 0){
536                                 if (output != "square") {  driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff); }
537                                 else { driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", "square"); }
538                                 exit(0);
539                         }else { 
540                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
541                                 for (map<int, int>::iterator it = processIDS.begin(); it != processIDS.end(); it++) { int temp = it->second; kill (temp, SIGINT); }
542                                 exit(0);
543                         }
544                 }
545         
546                 //force parent to wait until all the processes are done
547                 for (map<int, int>::iterator it = processIDS.begin(); it != processIDS.end(); it++) { 
548                         int temp = it->second;
549                         wait(&temp);
550                 }
551 #endif
552         }
553         catch(exception& e) {
554                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
555                 exit(1);
556         }
557 }
558
559 /**************************************************************************************************/
560 /////// need to fix to work with calcs and sequencedb
561 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
562         try {
563
564                 int startTime = time(NULL);
565                 
566                 //column file
567                 ofstream outFile(dFileName.c_str(), ios::trunc);
568                 outFile.setf(ios::fixed, ios::showpoint);
569                 outFile << setprecision(4);
570                 
571                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
572                 
573                 for(int i=startLine;i<endLine;i++){
574                         if(output == "lt")      {       
575                                 string name = alignDB.get(i).getName();
576                                 if (name.length() < 10) { //pad with spaces to make compatible
577                                         while (name.length() < 10) {  name += " ";  }
578                                 }
579                                 outFile << name << '\t';        
580                         }
581                         
582                         for(int j=0;j<i;j++){
583                                 
584                                 if (m->control_pressed) { outFile.close(); return 0;  }
585                                 
586                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
587                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
588                                 }
589                                 
590                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
591                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
592                                 }
593                                 
594                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
595                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
596                                 
597                                 Nast(alignment, seqI, seqJ);
598                                 
599                                 distCalculator->calcDist(*seqI, *seqJ);
600                                 double dist = distCalculator->getDist();
601                                 
602                                 delete seqI; delete seqJ;
603                                 
604                                 if(dist <= cutoff){
605                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
606                                 }
607                                 if (output == "lt") {  outFile << dist << '\t'; }
608                         }
609                         
610                         if (output == "lt") { outFile << endl; }
611                         
612                         if(i % 100 == 0){
613                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
614                         }
615                         
616                 }
617                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
618                 
619                 outFile.close();
620                 
621                 return 1;
622         }
623         catch(exception& e) {
624                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
625                 exit(1);
626         }
627 }
628 /**************************************************************************************************/
629 /////// need to fix to work with calcs and sequencedb
630 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
631         try {
632
633                 int startTime = time(NULL);
634                 
635                 //column file
636                 ofstream outFile(dFileName.c_str(), ios::trunc);
637                 outFile.setf(ios::fixed, ios::showpoint);
638                 outFile << setprecision(4);
639                 
640                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
641                 
642                 for(int i=startLine;i<endLine;i++){
643                                 
644                         string name = alignDB.get(i).getName();
645                         //pad with spaces to make compatible
646                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
647                                 
648                         outFile << name << '\t';        
649                         
650                         for(int j=0;j<alignDB.getNumSeqs();j++){
651                                 
652                                 if (m->control_pressed) { outFile.close(); return 0;  }
653                                 
654                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
655                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
656                                 }
657                                 
658                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
659                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
660                                 }
661                                 
662                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
663                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
664                                 
665                                 Nast(alignment, seqI, seqJ);
666                                 
667                                 distCalculator->calcDist(*seqI, *seqJ);
668                                 double dist = distCalculator->getDist();
669                                 
670                                 delete seqI; delete seqJ;
671                                                                 
672                                 outFile << dist << '\t'; 
673                         }
674                         
675                         outFile << endl; 
676                         
677                         if(i % 100 == 0){
678                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
679                         }
680                         
681                 }
682                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
683                 
684                 outFile.close();
685                 
686                 return 1;
687         }
688         catch(exception& e) {
689                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
690                 exit(1);
691         }
692 }
693 #ifdef USE_MPI
694 /**************************************************************************************************/
695 /////// need to fix to work with calcs and sequencedb
696 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
697         try {
698                 MPI_Status status;
699                 int startTime = time(NULL);
700                 
701                 string outputString = "";
702                 
703                 for(int i=startLine;i<endLine;i++){
704         
705                         for(int j=0;j<i;j++){
706                                 
707                                 if (m->control_pressed) {  return 0;  }
708                                 
709                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
710                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
711                                 }
712                                 
713                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
714                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
715                                 }
716                                 
717                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
718                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
719                                 
720                                 Nast(alignment, seqI, seqJ);
721                                 
722                                 distCalculator->calcDist(*seqI, *seqJ);
723                                 double dist = distCalculator->getDist();
724                                 
725                                 delete seqI; delete seqJ;
726                                 
727                                 if(dist <= cutoff){
728                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
729                                 }
730                         }
731                         
732                         if(i % 100 == 0){
733                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
734                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
735                         }
736                         
737                          
738                         //send results to parent
739                         int length = outputString.length();
740
741                         char* buf = new char[length];
742                         memcpy(buf, outputString.c_str(), length);
743                         
744                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
745                         outputString = "";
746                         delete buf;
747                         
748                 }
749                 
750                 return 1;
751         }
752         catch(exception& e) {
753                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
754                 exit(1);
755         }
756 }
757 /**************************************************************************************************/
758 /////// need to fix to work with calcs and sequencedb
759 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
760         try {
761                 MPI_Status status;
762                 
763                 MPI_File outMPI;
764                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
765
766                 char filename[1024];
767                 strcpy(filename, file.c_str());
768
769                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
770
771                 int startTime = time(NULL);
772                 
773                 string outputString = "";
774                 size = 0;
775                 
776                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
777                 
778                 for(int i=startLine;i<endLine;i++){
779                                 
780                         string name = alignDB.get(i).getName();
781                         if (name.length() < 10) { //pad with spaces to make compatible
782                                 while (name.length() < 10) {  name += " ";  }
783                         }
784                         outputString += name + "\t";    
785                         
786                         for(int j=0;j<i;j++){
787                                 
788                                 if (m->control_pressed) {  return 0;  }
789                                 
790                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
791                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
792                                 }
793                                 
794                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
795                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
796                                 }
797                                 
798                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
799                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
800                                 
801                                 Nast(alignment, seqI, seqJ);
802                                 
803                                 distCalculator->calcDist(*seqI, *seqJ);
804                                 double dist = distCalculator->getDist();
805                                 
806                                 delete seqI; delete seqJ;
807                                 
808                                 outputString += toString(dist) + "\t"; 
809                         }
810                         
811                         outputString += "\n"; 
812                         
813                         //send results to parent
814                         int length = outputString.length();
815                         char* buf = new char[length];
816                         memcpy(buf, outputString.c_str(), length);
817                         
818                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
819                         size += outputString.length();
820                         outputString = "";
821                         delete buf;
822                 }
823                 
824                 MPI_File_close(&outMPI);
825                 
826                 return 1;
827         }
828         catch(exception& e) {
829                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
830                 exit(1);
831         }
832 }
833 /**************************************************************************************************/
834 /////// need to fix to work with calcs and sequencedb
835 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
836         try {
837                 MPI_Status status;
838                 
839                 MPI_File outMPI;
840                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
841
842                 char filename[1024];
843                 strcpy(filename, file.c_str());
844
845                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
846                 
847                 int startTime = time(NULL);
848                 
849                 string outputString = "";
850                 size = 0;
851                 
852                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
853                 
854                 for(int i=startLine;i<endLine;i++){
855                                 
856                         string name = alignDB.get(i).getName();
857                         if (name.length() < 10) { //pad with spaces to make compatible
858                                 while (name.length() < 10) {  name += " ";  }
859                         }
860                         outputString += name + "\t";    
861                         
862                         for(int j=0;j<alignDB.getNumSeqs();j++){
863                                 
864                                 if (m->control_pressed) {  return 0;  }
865                                 
866                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
867                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
868                                 }
869                                 
870                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
871                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
872                                 }
873                                 
874                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
875                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
876                                 
877                                 Nast(alignment, seqI, seqJ);
878                                 
879                                 distCalculator->calcDist(*seqI, *seqJ);
880                                 double dist = distCalculator->getDist();
881                                 
882                                 delete seqI; delete seqJ;
883                                 
884                                 outputString += toString(dist) + "\t"; 
885                         }
886                         
887                         outputString += "\n"; 
888
889                         //send results to parent
890                         int length = outputString.length();
891                         char* buf = new char[length];
892                         memcpy(buf, outputString.c_str(), length);
893                         
894                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
895                         size += outputString.length();
896                         outputString = "";
897                         delete buf;
898                 }
899                 
900                 MPI_File_close(&outMPI);
901                 
902                 return 1;
903         }
904         catch(exception& e) {
905                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
906                 exit(1);
907         }
908 }
909 #endif
910 /**************************************************************************************************/
911