]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
23
24
25 //**********************************************************************************************************************
26 vector<string> PairwiseSeqsCommand::setParameters(){    
27         try {
28                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
29                 CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign);
30                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
31                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
32                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
33                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
34                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
35                 CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
36                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
37                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
38                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
39                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                 
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "PairwiseSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string PairwiseSeqsCommand::getHelpString(){    
54         try {
55                 string helpString = "";
56                 helpString += "The pairwise.seqs command reads a fasta file and creates distance matrix.\n";
57                 helpString += "The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n";
58                 helpString += "The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
59                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
60                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
61                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
62                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
63                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
64                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
65                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
66                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
67                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
68                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
69                 helpString += "The pairwise.seqs command should be in the following format: \n";
70                 helpString += "pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n";
71                 helpString += "Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n";
72                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
73                 return helpString;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "PairwiseSeqsCommand", "getHelpString");
77                 exit(1);
78         }
79 }
80
81 //**********************************************************************************************************************
82 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["phylip"] = tempOutNames;
88                 outputTypes["column"] = tempOutNames;
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
92                 exit(1);
93         }
94 }
95 //**********************************************************************************************************************
96 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
97         try {
98                 abort = false; calledHelp = false;   
99         
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103                 
104                 else {
105                         vector<string> myArray = setParameters();
106                         
107                         OptionParser parser(option);
108                         map<string, string> parameters = parser.getParameters(); 
109                         
110                         ValidParameters validParameter("pairwise.seqs");
111                         map<string, string>::iterator it;
112                         
113                         //check to make sure all parameters are valid for command
114                         for (it = parameters.begin(); it != parameters.end(); it++) { 
115                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
116                         }
117                         
118                         //initialize outputTypes
119                         vector<string> tempOutNames;
120                         outputTypes["phylip"] = tempOutNames;
121                         outputTypes["column"] = tempOutNames;
122                         
123                         //if the user changes the output directory command factory will send this info to us in the output parameter 
124                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
125                         
126
127                         //if the user changes the input directory command factory will send this info to us in the output parameter 
128                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
129                         
130                         if (inputDir == "not found"){   inputDir = "";          }
131
132                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
133                         if (fastaFileName == "not found") {                             
134                                 //if there is a current fasta file, use it
135                                 string filename = m->getFastaFile(); 
136                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }
139                         else { 
140                                 m->splitAtDash(fastaFileName, fastaFileNames);
141                                 
142                                 //go through files and make sure they are good, if not, then disregard them
143                                 for (int i = 0; i < fastaFileNames.size(); i++) {
144                                         
145                                         bool ignore = false;
146                                         if (fastaFileNames[i] == "current") { 
147                                                 fastaFileNames[i] = m->getFastaFile(); 
148                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
149                                                 else {  
150                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
151                                                         //erase from file list
152                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
153                                                         i--;
154                                                 }
155                                         }
156                                         
157                                         if (!ignore) {
158                                         
159                                                 if (inputDir != "") {
160                                                         string path = m->hasPath(fastaFileNames[i]);
161                                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
163                                                 }
164                 
165                                                 int ableToOpen;
166                                                 ifstream in;
167
168                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
169                                         
170                                                 //if you can't open it, try default location
171                                                 if (ableToOpen == 1) {
172                                                         if (m->getDefaultPath() != "") { //default path is set
173                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
174                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
175                                                                 ifstream in2;
176                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
177                                                                 in2.close();
178                                                                 fastaFileNames[i] = tryPath;
179                                                         }
180                                                 }
181                                                 
182                                                 //if you can't open it, try output location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getOutputDir() != "") { //default path is set
185                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 in.close();                                     
195
196                                                 if (ableToOpen == 1) { 
197                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
198                                                         //erase from file list
199                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
200                                                         i--;
201                                                 }else {
202                                                         m->setFastaFile(fastaFileNames[i]);
203                                                 }
204                                         }
205                                 }
206                                 
207                                 //make sure there is at least one valid file left
208                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
209                         }
210                 
211                         //check for optional parameter and set defaults
212                         // ...at some point should added some additional type checking...
213                         string temp;
214                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
215                         convert(temp, match);  
216                         
217                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
218                         convert(temp, misMatch);  
219                         
220                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
221                         convert(temp, gapOpen);  
222                         
223                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
224                         convert(temp, gapExtend); 
225                         
226                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
227                         m->setProcessors(temp);
228                         convert(temp, processors);
229                         
230                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
231                         convert(temp, cutoff); 
232                         
233                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
234                         countends = m->isTrue(temp); 
235                         
236                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
237                         compress = m->isTrue(temp); 
238                         
239                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
240                         
241                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
242                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
243                         
244                         calc = validParameter.validFile(parameters, "calc", false);                     
245                         if (calc == "not found") { calc = "onegap";  }
246                         else { 
247                                  if (calc == "default")  {  calc = "onegap";  }
248                         }
249                         m->splitAtDash(calc, Estimators);
250                         
251                         ValidCalculators validCalculator;
252                         if (countends) {
253                                 for (int i=0; i<Estimators.size(); i++) {
254                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
255                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
256                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
257                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
258                                         }
259                                 }
260                         }else {
261                                 for (int i=0; i<Estimators.size(); i++) {
262                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
263                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
264                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
265                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
266                                         }
267                                 }
268                         }
269                 }
270                 
271         }
272         catch(exception& e) {
273                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
274                 exit(1);
275         }
276 }
277 //**********************************************************************************************************************
278
279 int PairwiseSeqsCommand::execute(){
280         try {
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282                 
283                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
284                 
285                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
286                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
287                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
288                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
289                 else {
290                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
291                         m->mothurOutEndLine();
292                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
293                 }
294                 
295                 cutoff += 0.005;
296                 
297                 for (int s = 0; s < fastaFileNames.size(); s++) {
298                         if (m->control_pressed) { outputTypes.clear(); return 0; }
299                         
300                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
301                         
302                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
303                         
304                         ifstream inFASTA;
305                         m->openInputFile(fastaFileNames[s], inFASTA);
306                         alignDB = SequenceDB(inFASTA); 
307                         inFASTA.close();
308                         
309                         int numSeqs = alignDB.getNumSeqs();
310                         int startTime = time(NULL);
311                         string outputFile = "";
312                                 
313                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
314                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
315                                 m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
316                         }else if (output == "column") { //user wants column format
317                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
318                                 outputTypes["column"].push_back(outputFile);
319                                 m->mothurRemove(outputFile);
320                         }else { //assume square
321                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
322                                 m->mothurRemove(outputFile);
323                                 outputTypes["phylip"].push_back(outputFile);
324                         }
325                         
326                         #ifdef USE_MPI
327                 
328                         int pid, start, end; 
329                         int tag = 2001;
330                                         
331                         MPI_Status status; 
332                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
333                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
334                         
335                         //each process gets where it should start and stop in the file
336                         if (output != "square") {
337                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
338                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
339                         }else{
340                                 start = int ((float(pid)/float(processors)) * numSeqs);
341                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
342                         }
343                         
344                         if (output == "column") {
345                                 MPI_File outMPI;
346                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
347
348                         char filename[1024];
349                                 strcpy(filename, outputFile.c_str());
350                                 
351                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
352
353                                 if (pid == 0) { //you are the root process 
354                                 
355                                         //do your part
356                                         string outputMyPart;
357                                         
358                                         driverMPI(start, end, outMPI, cutoff); 
359                                         
360                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  m->mothurRemove(outputFile); delete distCalculator;  return 0; }
361                                 
362                                         //wait on chidren
363                                         for(int i = 1; i < processors; i++) { 
364                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   m->mothurRemove(outputFile); delete distCalculator;  return 0; }
365                                                 
366                                                 char buf[5];
367                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
368                                         }
369                                 }else { //you are a child process
370                                         //do your part
371                                         driverMPI(start, end, outMPI, cutoff); 
372                                         
373                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile); delete distCalculator;  return 0; }
374                                 
375                                         char buf[5];
376                                         strcpy(buf, "done"); 
377                                         //tell parent you are done.
378                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
379                                 }
380                                 
381                                 MPI_File_close(&outMPI);
382                                 
383                         }else { //lower triangle format
384                                 if (pid == 0) { //you are the root process 
385                                 
386                                         //do your part
387                                         string outputMyPart;
388                                         unsigned long int mySize;
389                                         
390                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
391                                         else { driverMPI(start, end, outputFile, mySize, output); }
392                 
393                                         if (m->control_pressed) {  outputTypes.clear();   m->mothurRemove(outputFile); delete distCalculator;  return 0; }
394                                         
395                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
396                                         MPI_File outMPI;
397                                         MPI_File inMPI;
398
399                                         char filename[1024];
400                                         strcpy(filename, outputFile.c_str());
401
402                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
403
404                                         //wait on chidren
405                                         for(int b = 1; b < processors; b++) { 
406                                                 unsigned long int fileSize;
407                                                 
408                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);  delete distCalculator;  return 0; }
409                                                 
410                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
411                                                 
412                                                 string outTemp = outputFile + toString(b) + ".temp";
413
414                                                 char* buf = new char[outTemp.length()];
415                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
416                                                 
417                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
418                                                 delete buf;
419
420                                                 int count = 0;
421                                                 while (count < fileSize) { 
422                                                         char buf2[1];
423                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
424                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
425                                                         count += 1;
426                                                 }
427                                                 
428                                                 MPI_File_close(&inMPI); //deleted on close
429                                         }
430                                         
431                                         MPI_File_close(&outMPI);
432                                 }else { //you are a child process
433                                         //do your part
434                                         unsigned long int size;
435                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
436                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
437                                         
438                                         if (m->control_pressed) { delete distCalculator;  return 0; }
439                                 
440                                         //tell parent you are done.
441                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
442                                 }
443                         }
444                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
445         #else           
446                                         
447                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
448                         //if you don't need to fork anything
449                         if(processors == 1){
450                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
451                                 else { driver(0, numSeqs, outputFile, "square");  }
452                         }else{ //you have multiple processors
453                                 
454                                 for (int i = 0; i < processors; i++) {
455                                         distlinePair tempLine;
456                                         lines.push_back(tempLine);
457                                         if (output != "square") {
458                                                 lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
459                                                 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
460                                         }else{
461                                                 lines[i].start = int ((float(i)/float(processors)) * numSeqs);
462                                                 lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
463                                         }
464                                 }
465                                 
466                                 createProcesses(outputFile); 
467                         }
468                 #else
469                         //ifstream inFASTA;
470                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
471                         else { driver(0, numSeqs, outputFile, "square");  }
472                 #endif
473                 
474         #endif
475                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
476                         
477                         #ifdef USE_MPI
478                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
479                                                 
480                                 if (pid == 0) { //only one process should output to screen
481                         #endif
482                         
483                         ifstream fileHandle;
484                         fileHandle.open(outputFile.c_str());
485                         if(fileHandle) {
486                                 m->gobble(fileHandle);
487                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
488                         }
489                         
490                         if (compress) {
491                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
492                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
493                                 system(("gzip -v " + outputFile).c_str());
494                                 outputNames.push_back(outputFile + ".gz");
495                         }else { outputNames.push_back(outputFile); }
496                         
497                         #ifdef USE_MPI
498                                 }
499                         #endif
500                         
501                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
502                         
503                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
504                 }
505                         
506                 delete distCalculator;
507                 
508                 //set phylip file as new current phylipfile
509                 string current = "";
510                 itTypes = outputTypes.find("phylip");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
513                 }
514                 
515                 //set column file as new current columnfile
516                 itTypes = outputTypes.find("column");
517                 if (itTypes != outputTypes.end()) {
518                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
519                 }
520                 
521                 m->mothurOutEndLine();
522                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
523                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
524                 m->mothurOutEndLine();
525                 
526
527                 return 0;
528         }
529         catch(exception& e) {
530                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
531                 exit(1);
532         }
533 }
534
535 /**************************************************************************************************/
536 void PairwiseSeqsCommand::createProcesses(string filename) {
537         try {
538 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
539                 int process = 1;
540                 processIDS.clear();
541                 
542                 //loop through and create all the processes you want
543                 while (process != processors) {
544                         int pid = fork();
545                         
546                         if (pid > 0) {
547                                 processIDS.push_back(pid); 
548                                 process++;
549                         }else if (pid == 0){
550                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
551                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
552                                 exit(0);
553                         }else { 
554                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
555                                 for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; kill (temp, SIGINT); }
556                                 exit(0);
557                         }
558                 }
559                 
560                 //parent do my part
561                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
562                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
563
564                 
565                 //force parent to wait until all the processes are done
566                 for (int i=0;i<processIDS.size();i++) { 
567                         int temp = processIDS[i];
568                         wait(&temp);
569                 }
570                 
571                 //append and remove temp files
572                 for (int i=0;i<processIDS.size();i++) { 
573                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
574                         m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
575                 }
576 #endif
577         }
578         catch(exception& e) {
579                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
580                 exit(1);
581         }
582 }
583
584 /**************************************************************************************************/
585 /////// need to fix to work with calcs and sequencedb
586 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
587         try {
588
589                 int startTime = time(NULL);
590                 
591                 //column file
592                 ofstream outFile(dFileName.c_str(), ios::trunc);
593                 outFile.setf(ios::fixed, ios::showpoint);
594                 outFile << setprecision(4);
595                 
596                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
597                 
598                 for(int i=startLine;i<endLine;i++){
599                         if(output == "lt")      {       
600                                 string name = alignDB.get(i).getName();
601                                 if (name.length() < 10) { //pad with spaces to make compatible
602                                         while (name.length() < 10) {  name += " ";  }
603                                 }
604                                 outFile << name << '\t';        
605                         }
606                         
607                         for(int j=0;j<i;j++){
608                                 
609                                 if (m->control_pressed) { outFile.close(); return 0;  }
610                                 
611                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
612                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
613                                 }
614                                 
615                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
616                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
617                                 }
618                                 
619                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
620                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
621                                 
622                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
623                                 seqI.setAligned(alignment->getSeqAAln());
624                                 seqJ.setAligned(alignment->getSeqBAln());
625
626                                 
627                                 distCalculator->calcDist(seqI, seqJ);
628                                 double dist = distCalculator->getDist();
629                                 
630                                 if(dist <= cutoff){
631                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
632                                 }
633                                 if (output == "lt") {  outFile << dist << '\t'; }
634                         }
635                         
636                         if (output == "lt") { outFile << endl; }
637                         
638                         if(i % 100 == 0){
639                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
640                         }
641                         
642                 }
643                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
644                 
645                 outFile.close();
646                 
647                 return 1;
648         }
649         catch(exception& e) {
650                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
651                 exit(1);
652         }
653 }
654 /**************************************************************************************************/
655 /////// need to fix to work with calcs and sequencedb
656 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
657         try {
658
659                 int startTime = time(NULL);
660                 
661                 //column file
662                 ofstream outFile(dFileName.c_str(), ios::trunc);
663                 outFile.setf(ios::fixed, ios::showpoint);
664                 outFile << setprecision(4);
665                 
666                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
667                 
668                 for(int i=startLine;i<endLine;i++){
669                                 
670                         string name = alignDB.get(i).getName();
671                         //pad with spaces to make compatible
672                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
673                                 
674                         outFile << name << '\t';        
675                         
676                         for(int j=0;j<alignDB.getNumSeqs();j++){
677                                 
678                                 if (m->control_pressed) { outFile.close(); return 0;  }
679                                 
680                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
681                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
682                                 }
683                                 
684                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
685                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
686                                 }
687                                 
688                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
689                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
690                                 
691                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
692                                 seqI.setAligned(alignment->getSeqAAln());
693                                 seqJ.setAligned(alignment->getSeqBAln());
694                                 
695                                 distCalculator->calcDist(seqI, seqJ);
696                                 double dist = distCalculator->getDist();
697                                                                 
698                                 outFile << dist << '\t'; 
699                         }
700                         
701                         outFile << endl; 
702                         
703                         if(i % 100 == 0){
704                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
705                         }
706                         
707                 }
708                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
709                 
710                 outFile.close();
711                 
712                 return 1;
713         }
714         catch(exception& e) {
715                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
716                 exit(1);
717         }
718 }
719 #ifdef USE_MPI
720 /**************************************************************************************************/
721 /////// need to fix to work with calcs and sequencedb
722 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
723         try {
724                 MPI_Status status;
725                 int startTime = time(NULL);
726                 
727                 string outputString = "";
728                 
729                 for(int i=startLine;i<endLine;i++){
730         
731                         for(int j=0;j<i;j++){
732                                 
733                                 if (m->control_pressed) {  return 0;  }
734                                 
735                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
736                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
737                                 }
738                                 
739                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
740                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
741                                 }
742                                 
743                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
744                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
745                                 
746                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
747                                 seqI.setAligned(alignment->getSeqAAln());
748                                 seqJ.setAligned(alignment->getSeqBAln());
749                                 
750                                 distCalculator->calcDist(seqI, seqJ);
751                                 double dist = distCalculator->getDist();
752                                 
753                                 if(dist <= cutoff){
754                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
755                                 }
756                         }
757                         
758                         if(i % 100 == 0){
759                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
760                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
761                         }
762                         
763                          
764                         //send results to parent
765                         int length = outputString.length();
766
767                         char* buf = new char[length];
768                         memcpy(buf, outputString.c_str(), length);
769                         
770                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
771                         outputString = "";
772                         delete buf;
773                         
774                 }
775                 
776                 return 1;
777         }
778         catch(exception& e) {
779                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
780                 exit(1);
781         }
782 }
783 /**************************************************************************************************/
784 /////// need to fix to work with calcs and sequencedb
785 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
786         try {
787                 MPI_Status status;
788                 
789                 MPI_File outMPI;
790                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
791
792                 char filename[1024];
793                 strcpy(filename, file.c_str());
794
795                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
796
797                 
798                 
799                 string outputString = "";
800                 size = 0;
801                 
802                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
803                 
804                 for(int i=startLine;i<endLine;i++){
805                                 
806                         string name = alignDB.get(i).getName();
807                         if (name.length() < 10) { //pad with spaces to make compatible
808                                 while (name.length() < 10) {  name += " ";  }
809                         }
810                         outputString += name + "\t";    
811                         
812                         for(int j=0;j<i;j++){
813                                 
814                                 if (m->control_pressed) {  return 0;  }
815                                 
816                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
817                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
818                                 }
819                                 
820                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
821                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
822                                 }
823                                 
824                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
825                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
826                                 
827                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
828                                 seqI.setAligned(alignment->getSeqAAln());
829                                 seqJ.setAligned(alignment->getSeqBAln());
830                                 
831                                 distCalculator->calcDist(seqI, seqJ);
832                                 double dist = distCalculator->getDist();
833                                 
834                                 outputString += toString(dist) + "\t"; 
835                         }
836                         
837                         outputString += "\n"; 
838                         
839                         //send results to parent
840                         int length = outputString.length();
841                         char* buf = new char[length];
842                         memcpy(buf, outputString.c_str(), length);
843                         
844                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
845                         size += outputString.length();
846                         outputString = "";
847                         delete buf;
848                 }
849                 
850                 MPI_File_close(&outMPI);
851                 
852                 return 1;
853         }
854         catch(exception& e) {
855                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
856                 exit(1);
857         }
858 }
859 /**************************************************************************************************/
860 /////// need to fix to work with calcs and sequencedb
861 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
862         try {
863                 MPI_Status status;
864                 
865                 MPI_File outMPI;
866                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
867
868                 char filename[1024];
869                 strcpy(filename, file.c_str());
870
871                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
872                 
873                 
874                 
875                 string outputString = "";
876                 size = 0;
877                 
878                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
879                 
880                 for(int i=startLine;i<endLine;i++){
881                                 
882                         string name = alignDB.get(i).getName();
883                         if (name.length() < 10) { //pad with spaces to make compatible
884                                 while (name.length() < 10) {  name += " ";  }
885                         }
886                         outputString += name + "\t";    
887                         
888                         for(int j=0;j<alignDB.getNumSeqs();j++){
889                                 
890                                 if (m->control_pressed) {  return 0;  }
891                                 
892                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
893                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
894                                 }
895                                 
896                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
897                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
898                                 }
899                                 
900                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
901                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
902                                 
903                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
904                                 seqI.setAligned(alignment->getSeqAAln());
905                                 seqJ.setAligned(alignment->getSeqBAln());
906                                 
907                                 distCalculator->calcDist(seqI, seqJ);
908                                 double dist = distCalculator->getDist();
909                                 
910                                 outputString += toString(dist) + "\t"; 
911                         }
912                         
913                         outputString += "\n"; 
914
915                         //send results to parent
916                         int length = outputString.length();
917                         char* buf = new char[length];
918                         memcpy(buf, outputString.c_str(), length);
919                         
920                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
921                         size += outputString.length();
922                         outputString = "";
923                         delete buf;
924                 }
925                 
926                 MPI_File_close(&outMPI);
927                 
928                 return 1;
929         }
930         catch(exception& e) {
931                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
932                 exit(1);
933         }
934 }
935 #endif
936 /**************************************************************************************************/
937