]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
random chagnes from pat
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
23
24
25 //**********************************************************************************************************************
26 vector<string> PairwiseSeqsCommand::getValidParameters(){       
27         try {
28                 string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","calc","compress","cutoff","countends","output","outputdir","inputdir"};
29                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "PairwiseSeqsCommand", "getValidParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> PairwiseSeqsCommand::getRequiredParameters(){    
39         try {
40                 string AlignArray[] =  {"fasta"};
41                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 vector<string> PairwiseSeqsCommand::getRequiredFiles(){ 
51         try {
52                 vector<string> myArray;
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredFiles");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
62         try {
63                 abort = true; calledHelp = true; 
64                 vector<string> tempOutNames;
65                 outputTypes["phylip"] = tempOutNames;
66                 outputTypes["column"] = tempOutNames;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
75         try {
76                 abort = false; calledHelp = false;   
77         
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 
81                 else {
82                         
83                         //valid paramters for this command
84                         string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","cutoff","compress","calc","countends","output","outputdir","inputdir"};
85                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
86                         
87                         OptionParser parser(option);
88                         map<string, string> parameters = parser.getParameters(); 
89                         
90                         ValidParameters validParameter("pairwise.seqs");
91                         map<string, string>::iterator it;
92                         
93                         //check to make sure all parameters are valid for command
94                         for (it = parameters.begin(); it != parameters.end(); it++) { 
95                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
96                         }
97                         
98                         //initialize outputTypes
99                         vector<string> tempOutNames;
100                         outputTypes["phylip"] = tempOutNames;
101                         outputTypes["column"] = tempOutNames;
102                         
103                         //if the user changes the output directory command factory will send this info to us in the output parameter 
104                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
105                         
106
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         
110                         if (inputDir == "not found"){   inputDir = "";          }
111
112                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
113                         if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the pairwise.seqs command."); m->mothurOutEndLine(); abort = true;  }
114                         else { 
115                                 m->splitAtDash(fastaFileName, fastaFileNames);
116                                 
117                                 //go through files and make sure they are good, if not, then disregard them
118                                 for (int i = 0; i < fastaFileNames.size(); i++) {
119                                         
120                                         if (inputDir != "") {
121                                                 string path = m->hasPath(fastaFileNames[i]);
122                                                 //if the user has not given a path then, add inputdir. else leave path alone.
123                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
124                                         }
125         
126                                         int ableToOpen;
127                                         ifstream in;
128
129                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
130                                 
131                                         //if you can't open it, try default location
132                                         if (ableToOpen == 1) {
133                                                 if (m->getDefaultPath() != "") { //default path is set
134                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
135                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
136                                                         ifstream in2;
137                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
138                                                         in2.close();
139                                                         fastaFileNames[i] = tryPath;
140                                                 }
141                                         }
142                                         
143                                         //if you can't open it, try output location
144                                         if (ableToOpen == 1) {
145                                                 if (m->getOutputDir() != "") { //default path is set
146                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
147                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
148                                                         ifstream in2;
149                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
150                                                         in2.close();
151                                                         fastaFileNames[i] = tryPath;
152                                                 }
153                                         }
154                                         
155                                         in.close();                                     
156
157                                         if (ableToOpen == 1) { 
158                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
159                                                 //erase from file list
160                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
161                                                 i--;
162                                         }
163                                 }
164                                 
165                                 //make sure there is at least one valid file left
166                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
167                         }
168                 
169                         //check for optional parameter and set defaults
170                         // ...at some point should added some additional type checking...
171                         string temp;
172                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
173                         convert(temp, match);  
174                         
175                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
176                         convert(temp, misMatch);  
177                         
178                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
179                         convert(temp, gapOpen);  
180                         
181                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
182                         convert(temp, gapExtend); 
183                         
184                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
185                         convert(temp, processors); 
186                         
187                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
188                         convert(temp, cutoff); 
189                         
190                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
191                         countends = m->isTrue(temp); 
192                         
193                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
194                         compress = m->isTrue(temp); 
195                         
196                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
197                         
198                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
199                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
200                         
201                         calc = validParameter.validFile(parameters, "calc", false);                     
202                         if (calc == "not found") { calc = "onegap";  }
203                         else { 
204                                  if (calc == "default")  {  calc = "onegap";  }
205                         }
206                         m->splitAtDash(calc, Estimators);
207                         
208                         ValidCalculators validCalculator;
209                         if (countends) {
210                                 for (int i=0; i<Estimators.size(); i++) {
211                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
212                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
213                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
214                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
215                                         }
216                                 }
217                         }else {
218                                 for (int i=0; i<Estimators.size(); i++) {
219                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
220                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
221                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
222                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
223                                         }
224                                 }
225                         }
226                 }
227                 
228         }
229         catch(exception& e) {
230                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
231                 exit(1);
232         }
233 }
234 //**********************************************************************************************************************
235 PairwiseSeqsCommand::~PairwiseSeqsCommand(){}
236 //**********************************************************************************************************************
237
238 void PairwiseSeqsCommand::help(){
239         try {
240                 m->mothurOut("The pairwise.seqs command reads a fasta file and creates distance matrix.\n");
241                 m->mothurOut("The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n");
242                 m->mothurOut("The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
243                 m->mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
244                 m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
245                 m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
246                 m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n");
247                 m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n");
248                 m->mothurOut("The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n");
249                 m->mothurOut("The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n");
250                 m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n");
251                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n");
252                 m->mothurOut("The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n");
253                 m->mothurOut("The pairwise.seqs command should be in the following format: \n");
254                 m->mothurOut("pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n");
255                 m->mothurOut("Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n");
256                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "PairwiseSeqsCommand", "help");
260                 exit(1);
261         }
262 }
263
264
265 //**********************************************************************************************************************
266
267 int PairwiseSeqsCommand::execute(){
268         try {
269                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
270                 
271                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
272                 
273                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
274                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
275                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
276                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
277                 else {
278                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
279                         m->mothurOutEndLine();
280                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
281                 }
282                 
283                 cutoff += 0.005;
284                 
285                 for (int s = 0; s < fastaFileNames.size(); s++) {
286                         if (m->control_pressed) { outputTypes.clear(); return 0; }
287                         
288                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
289                         
290                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
291                         
292                         ifstream inFASTA;
293                         m->openInputFile(fastaFileNames[s], inFASTA);
294                         alignDB = SequenceDB(inFASTA); 
295                         inFASTA.close();
296                         
297                         int numSeqs = alignDB.getNumSeqs();
298                         int startTime = time(NULL);
299                         string outputFile = "";
300                                 
301                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
302                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
303                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
304                         }else if (output == "column") { //user wants column format
305                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
306                                 outputTypes["column"].push_back(outputFile);
307                                 remove(outputFile.c_str());
308                         }else { //assume square
309                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
310                                 remove(outputFile.c_str());
311                                 outputTypes["phylip"].push_back(outputFile);
312                         }
313                         
314                         #ifdef USE_MPI
315                 
316                         int pid, start, end; 
317                         int tag = 2001;
318                                         
319                         MPI_Status status; 
320                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
321                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
322                         
323                         //each process gets where it should start and stop in the file
324                         if (output != "square") {
325                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
326                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
327                         }else{
328                                 start = int ((float(pid)/float(processors)) * numSeqs);
329                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
330                         }
331                         
332                         if (output == "column") {
333                                 MPI_File outMPI;
334                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
335
336                         char filename[1024];
337                                 strcpy(filename, outputFile.c_str());
338                                 
339                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
340
341                                 if (pid == 0) { //you are the root process 
342                                 
343                                         //do your part
344                                         string outputMyPart;
345                                         
346                                         driverMPI(start, end, outMPI, cutoff); 
347                                         
348                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
349                                 
350                                         //wait on chidren
351                                         for(int i = 1; i < processors; i++) { 
352                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
353                                                 
354                                                 char buf[5];
355                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
356                                         }
357                                 }else { //you are a child process
358                                         //do your part
359                                         driverMPI(start, end, outMPI, cutoff); 
360                                         
361                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
362                                 
363                                         char buf[5];
364                                         strcpy(buf, "done"); 
365                                         //tell parent you are done.
366                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
367                                 }
368                                 
369                                 MPI_File_close(&outMPI);
370                                 
371                         }else { //lower triangle format
372                                 if (pid == 0) { //you are the root process 
373                                 
374                                         //do your part
375                                         string outputMyPart;
376                                         unsigned long int mySize;
377                                         
378                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
379                                         else { driverMPI(start, end, outputFile, mySize, output); }
380                 
381                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
382                                         
383                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
384                                         MPI_File outMPI;
385                                         MPI_File inMPI;
386
387                                         char filename[1024];
388                                         strcpy(filename, outputFile.c_str());
389
390                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
391
392                                         //wait on chidren
393                                         for(int b = 1; b < processors; b++) { 
394                                                 unsigned long int fileSize;
395                                                 
396                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
397                                                 
398                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
399                                                 
400                                                 string outTemp = outputFile + toString(b) + ".temp";
401
402                                                 char* buf = new char[outTemp.length()];
403                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
404                                                 
405                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
406                                                 delete buf;
407
408                                                 int count = 0;
409                                                 while (count < fileSize) { 
410                                                         char buf2[1];
411                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
412                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
413                                                         count += 1;
414                                                 }
415                                                 
416                                                 MPI_File_close(&inMPI); //deleted on close
417                                         }
418                                         
419                                         MPI_File_close(&outMPI);
420                                 }else { //you are a child process
421                                         //do your part
422                                         unsigned long int size;
423                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
424                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
425                                         
426                                         if (m->control_pressed) { delete distCalculator;  return 0; }
427                                 
428                                         //tell parent you are done.
429                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
430                                 }
431                         }
432                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
433         #else           
434                                         
435                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
436                         //if you don't need to fork anything
437                         if(processors == 1){
438                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
439                                 else { driver(0, numSeqs, outputFile, "square");  }
440                         }else{ //you have multiple processors
441                                 
442                                 for (int i = 0; i < processors; i++) {
443                                         distlinePair tempLine;
444                                         lines.push_back(tempLine);
445                                         if (output != "square") {
446                                                 lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
447                                                 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
448                                         }else{
449                                                 lines[i].start = int ((float(i)/float(processors)) * numSeqs);
450                                                 lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
451                                         }
452                                 }
453                                 
454                                 createProcesses(outputFile); 
455                         }
456                 #else
457                         //ifstream inFASTA;
458                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
459                         else { driver(0, numSeqs, outputFile, "square");  }
460                 #endif
461                 
462         #endif
463                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
464                         
465                         #ifdef USE_MPI
466                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
467                                                 
468                                 if (pid == 0) { //only one process should output to screen
469                         #endif
470                         
471                         ifstream fileHandle;
472                         fileHandle.open(outputFile.c_str());
473                         if(fileHandle) {
474                                 m->gobble(fileHandle);
475                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
476                         }
477                         
478                         if (compress) {
479                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
480                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
481                                 system(("gzip -v " + outputFile).c_str());
482                                 outputNames.push_back(outputFile + ".gz");
483                         }else { outputNames.push_back(outputFile); }
484                         
485                         #ifdef USE_MPI
486                                 }
487                         #endif
488                         
489                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
490                         
491                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
492                 }
493                         
494                 delete distCalculator;
495                 
496                 //set phylip file as new current phylipfile
497                 string current = "";
498                 itTypes = outputTypes.find("phylip");
499                 if (itTypes != outputTypes.end()) {
500                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
501                 }
502                 
503                 //set column file as new current columnfile
504                 itTypes = outputTypes.find("column");
505                 if (itTypes != outputTypes.end()) {
506                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
507                 }
508                 
509                 m->mothurOutEndLine();
510                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
511                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
512                 m->mothurOutEndLine();
513                 
514
515                 return 0;
516         }
517         catch(exception& e) {
518                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
519                 exit(1);
520         }
521 }
522
523 /**************************************************************************************************/
524 void PairwiseSeqsCommand::createProcesses(string filename) {
525         try {
526 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
527                 int process = 1;
528                 processIDS.clear();
529                 
530                 //loop through and create all the processes you want
531                 while (process != processors) {
532                         int pid = fork();
533                         
534                         if (pid > 0) {
535                                 processIDS.push_back(pid); 
536                                 process++;
537                         }else if (pid == 0){
538                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
539                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
540                                 exit(0);
541                         }else { 
542                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
543                                 for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; kill (temp, SIGINT); }
544                                 exit(0);
545                         }
546                 }
547                 
548                 //parent do my part
549                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
550                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
551
552                 
553                 //force parent to wait until all the processes are done
554                 for (int i=0;i<processIDS.size();i++) { 
555                         int temp = processIDS[i];
556                         wait(&temp);
557                 }
558                 
559                 //append and remove temp files
560                 for (int i=0;i<processIDS.size();i++) { 
561                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
562                         remove((filename + toString(processIDS[i]) + ".temp").c_str());
563                 }
564 #endif
565         }
566         catch(exception& e) {
567                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
568                 exit(1);
569         }
570 }
571
572 /**************************************************************************************************/
573 /////// need to fix to work with calcs and sequencedb
574 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
575         try {
576
577                 int startTime = time(NULL);
578                 
579                 //column file
580                 ofstream outFile(dFileName.c_str(), ios::trunc);
581                 outFile.setf(ios::fixed, ios::showpoint);
582                 outFile << setprecision(4);
583                 
584                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
585                 
586                 for(int i=startLine;i<endLine;i++){
587                         if(output == "lt")      {       
588                                 string name = alignDB.get(i).getName();
589                                 if (name.length() < 10) { //pad with spaces to make compatible
590                                         while (name.length() < 10) {  name += " ";  }
591                                 }
592                                 outFile << name << '\t';        
593                         }
594                         
595                         for(int j=0;j<i;j++){
596                                 
597                                 if (m->control_pressed) { outFile.close(); return 0;  }
598                                 
599                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
600                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
601                                 }
602                                 
603                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
604                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
605                                 }
606                                 
607                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
608                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
609                                 
610                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
611                                 seqI.setAligned(alignment->getSeqAAln());
612                                 seqJ.setAligned(alignment->getSeqBAln());
613
614                                 
615                                 distCalculator->calcDist(seqI, seqJ);
616                                 double dist = distCalculator->getDist();
617                                 
618                                 if(dist <= cutoff){
619                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
620                                 }
621                                 if (output == "lt") {  outFile << dist << '\t'; }
622                         }
623                         
624                         if (output == "lt") { outFile << endl; }
625                         
626                         if(i % 100 == 0){
627                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
628                         }
629                         
630                 }
631                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
632                 
633                 outFile.close();
634                 
635                 return 1;
636         }
637         catch(exception& e) {
638                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
639                 exit(1);
640         }
641 }
642 /**************************************************************************************************/
643 /////// need to fix to work with calcs and sequencedb
644 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
645         try {
646
647                 int startTime = time(NULL);
648                 
649                 //column file
650                 ofstream outFile(dFileName.c_str(), ios::trunc);
651                 outFile.setf(ios::fixed, ios::showpoint);
652                 outFile << setprecision(4);
653                 
654                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
655                 
656                 for(int i=startLine;i<endLine;i++){
657                                 
658                         string name = alignDB.get(i).getName();
659                         //pad with spaces to make compatible
660                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
661                                 
662                         outFile << name << '\t';        
663                         
664                         for(int j=0;j<alignDB.getNumSeqs();j++){
665                                 
666                                 if (m->control_pressed) { outFile.close(); return 0;  }
667                                 
668                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
669                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
670                                 }
671                                 
672                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
673                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
674                                 }
675                                 
676                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
677                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
678                                 
679                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
680                                 seqI.setAligned(alignment->getSeqAAln());
681                                 seqJ.setAligned(alignment->getSeqBAln());
682                                 
683                                 distCalculator->calcDist(seqI, seqJ);
684                                 double dist = distCalculator->getDist();
685                                                                 
686                                 outFile << dist << '\t'; 
687                         }
688                         
689                         outFile << endl; 
690                         
691                         if(i % 100 == 0){
692                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
693                         }
694                         
695                 }
696                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
697                 
698                 outFile.close();
699                 
700                 return 1;
701         }
702         catch(exception& e) {
703                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
704                 exit(1);
705         }
706 }
707 #ifdef USE_MPI
708 /**************************************************************************************************/
709 /////// need to fix to work with calcs and sequencedb
710 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
711         try {
712                 MPI_Status status;
713                 int startTime = time(NULL);
714                 
715                 string outputString = "";
716                 
717                 for(int i=startLine;i<endLine;i++){
718         
719                         for(int j=0;j<i;j++){
720                                 
721                                 if (m->control_pressed) {  return 0;  }
722                                 
723                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
724                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
725                                 }
726                                 
727                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
728                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
729                                 }
730                                 
731                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
732                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
733                                 
734                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
735                                 seqI.setAligned(alignment->getSeqAAln());
736                                 seqJ.setAligned(alignment->getSeqBAln());
737                                 
738                                 distCalculator->calcDist(seqI, seqJ);
739                                 double dist = distCalculator->getDist();
740                                 
741                                 if(dist <= cutoff){
742                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
743                                 }
744                         }
745                         
746                         if(i % 100 == 0){
747                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
748                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
749                         }
750                         
751                          
752                         //send results to parent
753                         int length = outputString.length();
754
755                         char* buf = new char[length];
756                         memcpy(buf, outputString.c_str(), length);
757                         
758                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
759                         outputString = "";
760                         delete buf;
761                         
762                 }
763                 
764                 return 1;
765         }
766         catch(exception& e) {
767                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
768                 exit(1);
769         }
770 }
771 /**************************************************************************************************/
772 /////// need to fix to work with calcs and sequencedb
773 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
774         try {
775                 MPI_Status status;
776                 
777                 MPI_File outMPI;
778                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
779
780                 char filename[1024];
781                 strcpy(filename, file.c_str());
782
783                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
784
785                 
786                 
787                 string outputString = "";
788                 size = 0;
789                 
790                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
791                 
792                 for(int i=startLine;i<endLine;i++){
793                                 
794                         string name = alignDB.get(i).getName();
795                         if (name.length() < 10) { //pad with spaces to make compatible
796                                 while (name.length() < 10) {  name += " ";  }
797                         }
798                         outputString += name + "\t";    
799                         
800                         for(int j=0;j<i;j++){
801                                 
802                                 if (m->control_pressed) {  return 0;  }
803                                 
804                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
805                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
806                                 }
807                                 
808                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
809                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
810                                 }
811                                 
812                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
813                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
814                                 
815                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
816                                 seqI.setAligned(alignment->getSeqAAln());
817                                 seqJ.setAligned(alignment->getSeqBAln());
818                                 
819                                 distCalculator->calcDist(seqI, seqJ);
820                                 double dist = distCalculator->getDist();
821                                 
822                                 outputString += toString(dist) + "\t"; 
823                         }
824                         
825                         outputString += "\n"; 
826                         
827                         //send results to parent
828                         int length = outputString.length();
829                         char* buf = new char[length];
830                         memcpy(buf, outputString.c_str(), length);
831                         
832                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
833                         size += outputString.length();
834                         outputString = "";
835                         delete buf;
836                 }
837                 
838                 MPI_File_close(&outMPI);
839                 
840                 return 1;
841         }
842         catch(exception& e) {
843                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
844                 exit(1);
845         }
846 }
847 /**************************************************************************************************/
848 /////// need to fix to work with calcs and sequencedb
849 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
850         try {
851                 MPI_Status status;
852                 
853                 MPI_File outMPI;
854                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
855
856                 char filename[1024];
857                 strcpy(filename, file.c_str());
858
859                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
860                 
861                 
862                 
863                 string outputString = "";
864                 size = 0;
865                 
866                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
867                 
868                 for(int i=startLine;i<endLine;i++){
869                                 
870                         string name = alignDB.get(i).getName();
871                         if (name.length() < 10) { //pad with spaces to make compatible
872                                 while (name.length() < 10) {  name += " ";  }
873                         }
874                         outputString += name + "\t";    
875                         
876                         for(int j=0;j<alignDB.getNumSeqs();j++){
877                                 
878                                 if (m->control_pressed) {  return 0;  }
879                                 
880                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
881                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
882                                 }
883                                 
884                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
885                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
886                                 }
887                                 
888                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
889                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
890                                 
891                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
892                                 seqI.setAligned(alignment->getSeqAAln());
893                                 seqJ.setAligned(alignment->getSeqBAln());
894                                 
895                                 distCalculator->calcDist(seqI, seqJ);
896                                 double dist = distCalculator->getDist();
897                                 
898                                 outputString += toString(dist) + "\t"; 
899                         }
900                         
901                         outputString += "\n"; 
902
903                         //send results to parent
904                         int length = outputString.length();
905                         char* buf = new char[length];
906                         memcpy(buf, outputString.c_str(), length);
907                         
908                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
909                         size += outputString.length();
910                         outputString = "";
911                         delete buf;
912                 }
913                 
914                 MPI_File_close(&outMPI);
915                 
916                 return 1;
917         }
918         catch(exception& e) {
919                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
920                 exit(1);
921         }
922 }
923 #endif
924 /**************************************************************************************************/
925