]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17 #include "nast.hpp"
18
19 #include "ignoregaps.h"
20 #include "eachgapdist.h"
21 #include "eachgapignore.h"
22 #include "onegapdist.h"
23 #include "onegapignore.h"
24
25
26 //**********************************************************************************************************************
27 vector<string> PairwiseSeqsCommand::getValidParameters(){       
28         try {
29                 string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","calc","compress","cutoff","countends","output","outputdir","inputdir"};
30                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "PairwiseSeqsCommand", "getValidParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> PairwiseSeqsCommand::getRequiredParameters(){    
40         try {
41                 string AlignArray[] =  {"fasta"};
42                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> PairwiseSeqsCommand::getRequiredFiles(){ 
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
63         try {
64                 abort = true; calledHelp = true; 
65                 vector<string> tempOutNames;
66                 outputTypes["phylip"] = tempOutNames;
67                 outputTypes["column"] = tempOutNames;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
76         try {
77                 abort = false; calledHelp = false;   
78         
79                 //allow user to run help
80                 if(option == "help") { help(); abort = true; calledHelp = true; }
81                 
82                 else {
83                         
84                         //valid paramters for this command
85                         string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","cutoff","compress","calc","countends","output","outputdir","inputdir"};
86                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
87                         
88                         OptionParser parser(option);
89                         map<string, string> parameters = parser.getParameters(); 
90                         
91                         ValidParameters validParameter("pairwise.seqs");
92                         map<string, string>::iterator it;
93                         
94                         //check to make sure all parameters are valid for command
95                         for (it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["phylip"] = tempOutNames;
102                         outputTypes["column"] = tempOutNames;
103                         
104                         //if the user changes the output directory command factory will send this info to us in the output parameter 
105                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
106                         
107
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         
111                         if (inputDir == "not found"){   inputDir = "";          }
112
113                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
114                         if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the pairwise.seqs command."); m->mothurOutEndLine(); abort = true;  }
115                         else { 
116                                 m->splitAtDash(fastaFileName, fastaFileNames);
117                                 
118                                 //go through files and make sure they are good, if not, then disregard them
119                                 for (int i = 0; i < fastaFileNames.size(); i++) {
120                                         
121                                         if (inputDir != "") {
122                                                 string path = m->hasPath(fastaFileNames[i]);
123                                                 //if the user has not given a path then, add inputdir. else leave path alone.
124                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
125                                         }
126         
127                                         int ableToOpen;
128                                         ifstream in;
129
130                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
131                                 
132                                         //if you can't open it, try default location
133                                         if (ableToOpen == 1) {
134                                                 if (m->getDefaultPath() != "") { //default path is set
135                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
136                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
137                                                         ifstream in2;
138                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
139                                                         in2.close();
140                                                         fastaFileNames[i] = tryPath;
141                                                 }
142                                         }
143                                         
144                                         //if you can't open it, try output location
145                                         if (ableToOpen == 1) {
146                                                 if (m->getOutputDir() != "") { //default path is set
147                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
148                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
149                                                         ifstream in2;
150                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
151                                                         in2.close();
152                                                         fastaFileNames[i] = tryPath;
153                                                 }
154                                         }
155                                         
156                                         in.close();                                     
157
158                                         if (ableToOpen == 1) { 
159                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
160                                                 //erase from file list
161                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
162                                                 i--;
163                                         }
164                                 }
165                                 
166                                 //make sure there is at least one valid file left
167                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
168                         }
169                 
170                         //check for optional parameter and set defaults
171                         // ...at some point should added some additional type checking...
172                         string temp;
173                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
174                         convert(temp, match);  
175                         
176                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
177                         convert(temp, misMatch);  
178                         
179                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
180                         convert(temp, gapOpen);  
181                         
182                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
183                         convert(temp, gapExtend); 
184                         
185                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
186                         convert(temp, processors); 
187                         
188                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
189                         convert(temp, cutoff); 
190                         
191                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
192                         countends = m->isTrue(temp); 
193                         
194                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
195                         compress = m->isTrue(temp); 
196                         
197                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
198                         
199                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
200                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
201                         
202                         calc = validParameter.validFile(parameters, "calc", false);                     
203                         if (calc == "not found") { calc = "onegap";  }
204                         else { 
205                                  if (calc == "default")  {  calc = "onegap";  }
206                         }
207                         m->splitAtDash(calc, Estimators);
208                         
209                         ValidCalculators validCalculator;
210                         if (countends) {
211                                 for (int i=0; i<Estimators.size(); i++) {
212                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
213                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
214                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
215                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
216                                         }
217                                 }
218                         }else {
219                                 for (int i=0; i<Estimators.size(); i++) {
220                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
221                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
222                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
223                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
224                                         }
225                                 }
226                         }
227                 }
228                 
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
232                 exit(1);
233         }
234 }
235 //**********************************************************************************************************************
236 PairwiseSeqsCommand::~PairwiseSeqsCommand(){}
237 //**********************************************************************************************************************
238
239 void PairwiseSeqsCommand::help(){
240         try {
241                 m->mothurOut("The pairwise.seqs command reads a fasta file and creates distance matrix.\n");
242                 m->mothurOut("The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n");
243                 m->mothurOut("The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
244                 m->mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
245                 m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
246                 m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
247                 m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n");
248                 m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n");
249                 m->mothurOut("The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n");
250                 m->mothurOut("The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n");
251                 m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n");
252                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n");
253                 m->mothurOut("The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n");
254                 m->mothurOut("The pairwise.seqs command should be in the following format: \n");
255                 m->mothurOut("pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n");
256                 m->mothurOut("Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n");
257                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "PairwiseSeqsCommand", "help");
261                 exit(1);
262         }
263 }
264
265
266 //**********************************************************************************************************************
267
268 int PairwiseSeqsCommand::execute(){
269         try {
270                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
271                 
272                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
273                 
274                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
275                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
276                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
277                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
278                 else {
279                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
280                         m->mothurOutEndLine();
281                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
282                 }
283                 
284                 cutoff += 0.005;
285                 
286                 for (int s = 0; s < fastaFileNames.size(); s++) {
287                         if (m->control_pressed) { outputTypes.clear(); return 0; }
288                         
289                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
290                         
291                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
292                         
293                         ifstream inFASTA;
294                         m->openInputFile(fastaFileNames[s], inFASTA);
295                         alignDB = SequenceDB(inFASTA); 
296                         inFASTA.close();
297                         
298                         int numSeqs = alignDB.getNumSeqs();
299                         int startTime = time(NULL);
300                         string outputFile = "";
301                                 
302                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
303                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
304                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
305                         }else if (output == "column") { //user wants column format
306                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
307                                 outputTypes["column"].push_back(outputFile);
308                                 remove(outputFile.c_str());
309                         }else { //assume square
310                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
311                                 remove(outputFile.c_str());
312                                 outputTypes["phylip"].push_back(outputFile);
313                         }
314                         
315                         #ifdef USE_MPI
316                 
317                         int pid, start, end; 
318                         int tag = 2001;
319                                         
320                         MPI_Status status; 
321                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
322                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
323                         
324                         //each process gets where it should start and stop in the file
325                         if (output != "square") {
326                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
327                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
328                         }else{
329                                 start = int ((float(pid)/float(processors)) * numSeqs);
330                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
331                         }
332                         
333                         if (output == "column") {
334                                 MPI_File outMPI;
335                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
336
337                         char filename[1024];
338                                 strcpy(filename, outputFile.c_str());
339                                 
340                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
341
342                                 if (pid == 0) { //you are the root process 
343                                 
344                                         //do your part
345                                         string outputMyPart;
346                                         
347                                         driverMPI(start, end, outMPI, cutoff); 
348                                         
349                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
350                                 
351                                         //wait on chidren
352                                         for(int i = 1; i < processors; i++) { 
353                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
354                                                 
355                                                 char buf[5];
356                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
357                                         }
358                                 }else { //you are a child process
359                                         //do your part
360                                         driverMPI(start, end, outMPI, cutoff); 
361                                         
362                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
363                                 
364                                         char buf[5];
365                                         strcpy(buf, "done"); 
366                                         //tell parent you are done.
367                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
368                                 }
369                                 
370                                 MPI_File_close(&outMPI);
371                                 
372                         }else { //lower triangle format
373                                 if (pid == 0) { //you are the root process 
374                                 
375                                         //do your part
376                                         string outputMyPart;
377                                         unsigned long int mySize;
378                                         
379                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
380                                         else { driverMPI(start, end, outputFile, mySize, output); }
381                 
382                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
383                                         
384                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
385                                         MPI_File outMPI;
386                                         MPI_File inMPI;
387
388                                         char filename[1024];
389                                         strcpy(filename, outputFile.c_str());
390
391                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
392
393                                         //wait on chidren
394                                         for(int b = 1; b < processors; b++) { 
395                                                 unsigned long int fileSize;
396                                                 
397                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
398                                                 
399                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
400                                                 
401                                                 string outTemp = outputFile + toString(b) + ".temp";
402
403                                                 char* buf = new char[outTemp.length()];
404                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
405                                                 
406                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
407                                                 delete buf;
408
409                                                 int count = 0;
410                                                 while (count < fileSize) { 
411                                                         char buf2[1];
412                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
413                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
414                                                         count += 1;
415                                                 }
416                                                 
417                                                 MPI_File_close(&inMPI); //deleted on close
418                                         }
419                                         
420                                         MPI_File_close(&outMPI);
421                                 }else { //you are a child process
422                                         //do your part
423                                         unsigned long int size;
424                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
425                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
426                                         
427                                         if (m->control_pressed) { delete distCalculator;  return 0; }
428                                 
429                                         //tell parent you are done.
430                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
431                                 }
432                         }
433                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
434         #else           
435                                         
436                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
437                         //if you don't need to fork anything
438                         if(processors == 1){
439                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
440                                 else { driver(0, numSeqs, outputFile, "square");  }
441                         }else{ //you have multiple processors
442                                 
443                                 for (int i = 0; i < processors; i++) {
444                                         distlinePair tempLine;
445                                         lines.push_back(tempLine);
446                                         if (output != "square") {
447                                                 lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
448                                                 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
449                                         }else{
450                                                 lines[i].start = int ((float(i)/float(processors)) * numSeqs);
451                                                 lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
452                                         }
453                                 }
454                                 
455                                 createProcesses(outputFile); 
456                         }
457                 #else
458                         //ifstream inFASTA;
459                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
460                         else { driver(0, numSeqs, outputFile, "square");  }
461                 #endif
462                 
463         #endif
464                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
465                         
466                         #ifdef USE_MPI
467                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
468                                                 
469                                 if (pid == 0) { //only one process should output to screen
470                         #endif
471                         
472                         ifstream fileHandle;
473                         fileHandle.open(outputFile.c_str());
474                         if(fileHandle) {
475                                 m->gobble(fileHandle);
476                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
477                         }
478                         
479                         if (compress) {
480                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
481                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
482                                 system(("gzip -v " + outputFile).c_str());
483                                 outputNames.push_back(outputFile + ".gz");
484                         }else { outputNames.push_back(outputFile); }
485                         
486                         #ifdef USE_MPI
487                                 }
488                         #endif
489                         
490                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
491                         
492                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
493                 }
494                         
495                 delete distCalculator;
496                 
497                 m->mothurOutEndLine();
498                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
499                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
500                 m->mothurOutEndLine();
501                 
502
503                 return 0;
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
507                 exit(1);
508         }
509 }
510
511 /**************************************************************************************************/
512 void PairwiseSeqsCommand::createProcesses(string filename) {
513         try {
514 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
515                 int process = 1;
516                 processIDS.clear();
517                 
518                 //loop through and create all the processes you want
519                 while (process != processors) {
520                         int pid = fork();
521                         
522                         if (pid > 0) {
523                                 processIDS.push_back(pid); 
524                                 process++;
525                         }else if (pid == 0){
526                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
527                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
528                                 exit(0);
529                         }else { 
530                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
531                                 for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; kill (temp, SIGINT); }
532                                 exit(0);
533                         }
534                 }
535                 
536                 //parent do my part
537                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
538                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
539
540                 
541                 //force parent to wait until all the processes are done
542                 for (int i=0;i<processIDS.size();i++) { 
543                         int temp = processIDS[i];
544                         wait(&temp);
545                 }
546                 
547                 //append and remove temp files
548                 for (int i=0;i<processIDS.size();i++) { 
549                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
550                         remove((filename + toString(processIDS[i]) + ".temp").c_str());
551                 }
552 #endif
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
556                 exit(1);
557         }
558 }
559
560 /**************************************************************************************************/
561 /////// need to fix to work with calcs and sequencedb
562 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
563         try {
564
565                 int startTime = time(NULL);
566                 
567                 //column file
568                 ofstream outFile(dFileName.c_str(), ios::trunc);
569                 outFile.setf(ios::fixed, ios::showpoint);
570                 outFile << setprecision(4);
571                 
572                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
573                 
574                 for(int i=startLine;i<endLine;i++){
575                         if(output == "lt")      {       
576                                 string name = alignDB.get(i).getName();
577                                 if (name.length() < 10) { //pad with spaces to make compatible
578                                         while (name.length() < 10) {  name += " ";  }
579                                 }
580                                 outFile << name << '\t';        
581                         }
582                         
583                         for(int j=0;j<i;j++){
584                                 
585                                 if (m->control_pressed) { outFile.close(); return 0;  }
586                                 
587                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
588                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
589                                 }
590                                 
591                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
592                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
593                                 }
594                                 
595                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
596                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
597                                 
598                                 Nast(alignment, seqI, seqJ);
599                                 
600                                 distCalculator->calcDist(*seqI, *seqJ);
601                                 double dist = distCalculator->getDist();
602                                 
603                                 delete seqI; delete seqJ;
604                                 
605                                 if(dist <= cutoff){
606                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
607                                 }
608                                 if (output == "lt") {  outFile << dist << '\t'; }
609                         }
610                         
611                         if (output == "lt") { outFile << endl; }
612                         
613                         if(i % 100 == 0){
614                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
615                         }
616                         
617                 }
618                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
619                 
620                 outFile.close();
621                 
622                 return 1;
623         }
624         catch(exception& e) {
625                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
626                 exit(1);
627         }
628 }
629 /**************************************************************************************************/
630 /////// need to fix to work with calcs and sequencedb
631 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
632         try {
633
634                 int startTime = time(NULL);
635                 
636                 //column file
637                 ofstream outFile(dFileName.c_str(), ios::trunc);
638                 outFile.setf(ios::fixed, ios::showpoint);
639                 outFile << setprecision(4);
640                 
641                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
642                 
643                 for(int i=startLine;i<endLine;i++){
644                                 
645                         string name = alignDB.get(i).getName();
646                         //pad with spaces to make compatible
647                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
648                                 
649                         outFile << name << '\t';        
650                         
651                         for(int j=0;j<alignDB.getNumSeqs();j++){
652                                 
653                                 if (m->control_pressed) { outFile.close(); return 0;  }
654                                 
655                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
656                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
657                                 }
658                                 
659                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
660                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
661                                 }
662                                 
663                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
664                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
665                                 
666                                 Nast(alignment, seqI, seqJ);
667                                 
668                                 distCalculator->calcDist(*seqI, *seqJ);
669                                 double dist = distCalculator->getDist();
670                                 
671                                 delete seqI; delete seqJ;
672                                                                 
673                                 outFile << dist << '\t'; 
674                         }
675                         
676                         outFile << endl; 
677                         
678                         if(i % 100 == 0){
679                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
680                         }
681                         
682                 }
683                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
684                 
685                 outFile.close();
686                 
687                 return 1;
688         }
689         catch(exception& e) {
690                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
691                 exit(1);
692         }
693 }
694 #ifdef USE_MPI
695 /**************************************************************************************************/
696 /////// need to fix to work with calcs and sequencedb
697 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
698         try {
699                 MPI_Status status;
700                 int startTime = time(NULL);
701                 
702                 string outputString = "";
703                 
704                 for(int i=startLine;i<endLine;i++){
705         
706                         for(int j=0;j<i;j++){
707                                 
708                                 if (m->control_pressed) {  return 0;  }
709                                 
710                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
711                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
712                                 }
713                                 
714                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
715                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
716                                 }
717                                 
718                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
719                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
720                                 
721                                 Nast(alignment, seqI, seqJ);
722                                 
723                                 distCalculator->calcDist(*seqI, *seqJ);
724                                 double dist = distCalculator->getDist();
725                                 
726                                 delete seqI; delete seqJ;
727                                 
728                                 if(dist <= cutoff){
729                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
730                                 }
731                         }
732                         
733                         if(i % 100 == 0){
734                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
735                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
736                         }
737                         
738                          
739                         //send results to parent
740                         int length = outputString.length();
741
742                         char* buf = new char[length];
743                         memcpy(buf, outputString.c_str(), length);
744                         
745                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
746                         outputString = "";
747                         delete buf;
748                         
749                 }
750                 
751                 return 1;
752         }
753         catch(exception& e) {
754                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
755                 exit(1);
756         }
757 }
758 /**************************************************************************************************/
759 /////// need to fix to work with calcs and sequencedb
760 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
761         try {
762                 MPI_Status status;
763                 
764                 MPI_File outMPI;
765                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
766
767                 char filename[1024];
768                 strcpy(filename, file.c_str());
769
770                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
771
772                 
773                 
774                 string outputString = "";
775                 size = 0;
776                 
777                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
778                 
779                 for(int i=startLine;i<endLine;i++){
780                                 
781                         string name = alignDB.get(i).getName();
782                         if (name.length() < 10) { //pad with spaces to make compatible
783                                 while (name.length() < 10) {  name += " ";  }
784                         }
785                         outputString += name + "\t";    
786                         
787                         for(int j=0;j<i;j++){
788                                 
789                                 if (m->control_pressed) {  return 0;  }
790                                 
791                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
792                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
793                                 }
794                                 
795                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
796                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
797                                 }
798                                 
799                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
800                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
801                                 
802                                 Nast(alignment, seqI, seqJ);
803                                 
804                                 distCalculator->calcDist(*seqI, *seqJ);
805                                 double dist = distCalculator->getDist();
806                                 
807                                 delete seqI; delete seqJ;
808                                 
809                                 outputString += toString(dist) + "\t"; 
810                         }
811                         
812                         outputString += "\n"; 
813                         
814                         //send results to parent
815                         int length = outputString.length();
816                         char* buf = new char[length];
817                         memcpy(buf, outputString.c_str(), length);
818                         
819                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
820                         size += outputString.length();
821                         outputString = "";
822                         delete buf;
823                 }
824                 
825                 MPI_File_close(&outMPI);
826                 
827                 return 1;
828         }
829         catch(exception& e) {
830                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
831                 exit(1);
832         }
833 }
834 /**************************************************************************************************/
835 /////// need to fix to work with calcs and sequencedb
836 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
837         try {
838                 MPI_Status status;
839                 
840                 MPI_File outMPI;
841                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
842
843                 char filename[1024];
844                 strcpy(filename, file.c_str());
845
846                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
847                 
848                 
849                 
850                 string outputString = "";
851                 size = 0;
852                 
853                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
854                 
855                 for(int i=startLine;i<endLine;i++){
856                                 
857                         string name = alignDB.get(i).getName();
858                         if (name.length() < 10) { //pad with spaces to make compatible
859                                 while (name.length() < 10) {  name += " ";  }
860                         }
861                         outputString += name + "\t";    
862                         
863                         for(int j=0;j<alignDB.getNumSeqs();j++){
864                                 
865                                 if (m->control_pressed) {  return 0;  }
866                                 
867                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
868                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
869                                 }
870                                 
871                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
872                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
873                                 }
874                                 
875                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
876                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
877                                 
878                                 Nast(alignment, seqI, seqJ);
879                                 
880                                 distCalculator->calcDist(*seqI, *seqJ);
881                                 double dist = distCalculator->getDist();
882                                 
883                                 delete seqI; delete seqJ;
884                                 
885                                 outputString += toString(dist) + "\t"; 
886                         }
887                         
888                         outputString += "\n"; 
889
890                         //send results to parent
891                         int length = outputString.length();
892                         char* buf = new char[length];
893                         memcpy(buf, outputString.c_str(), length);
894                         
895                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
896                         size += outputString.length();
897                         outputString = "";
898                         delete buf;
899                 }
900                 
901                 MPI_File_close(&outMPI);
902                 
903                 return 1;
904         }
905         catch(exception& e) {
906                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
907                 exit(1);
908         }
909 }
910 #endif
911 /**************************************************************************************************/
912