]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
added citation function to commands
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
23
24
25 //**********************************************************************************************************************
26 vector<string> PairwiseSeqsCommand::setParameters(){    
27         try {
28                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
29                 CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign);
30                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
31                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
32                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
33                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
34                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
35                 CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
36                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
37                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
38                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
39                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                 
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "PairwiseSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string PairwiseSeqsCommand::getHelpString(){    
54         try {
55                 string helpString = "";
56                 helpString += "The pairwise.seqs command reads a fasta file and creates distance matrix.\n";
57                 helpString += "The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n";
58                 helpString += "The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
59                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
60                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
61                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
62                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
63                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
64                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
65                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
66                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
67                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
68                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
69                 helpString += "The pairwise.seqs command should be in the following format: \n";
70                 helpString += "pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n";
71                 helpString += "Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n";
72                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
73                 return helpString;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "PairwiseSeqsCommand", "getHelpString");
77                 exit(1);
78         }
79 }
80
81 //**********************************************************************************************************************
82 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["phylip"] = tempOutNames;
88                 outputTypes["column"] = tempOutNames;
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
92                 exit(1);
93         }
94 }
95 //**********************************************************************************************************************
96 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
97         try {
98                 abort = false; calledHelp = false;   
99         
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103                 
104                 else {
105                         vector<string> myArray = setParameters();
106                         
107                         OptionParser parser(option);
108                         map<string, string> parameters = parser.getParameters(); 
109                         
110                         ValidParameters validParameter("pairwise.seqs");
111                         map<string, string>::iterator it;
112                         
113                         //check to make sure all parameters are valid for command
114                         for (it = parameters.begin(); it != parameters.end(); it++) { 
115                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
116                         }
117                         
118                         //initialize outputTypes
119                         vector<string> tempOutNames;
120                         outputTypes["phylip"] = tempOutNames;
121                         outputTypes["column"] = tempOutNames;
122                         
123                         //if the user changes the output directory command factory will send this info to us in the output parameter 
124                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
125                         
126
127                         //if the user changes the input directory command factory will send this info to us in the output parameter 
128                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
129                         
130                         if (inputDir == "not found"){   inputDir = "";          }
131
132                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
133                         if (fastaFileName == "not found") {                             
134                                 //if there is a current fasta file, use it
135                                 string filename = m->getFastaFile(); 
136                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }
139                         else { 
140                                 m->splitAtDash(fastaFileName, fastaFileNames);
141                                 
142                                 //go through files and make sure they are good, if not, then disregard them
143                                 for (int i = 0; i < fastaFileNames.size(); i++) {
144                                         
145                                         bool ignore = false;
146                                         if (fastaFileNames[i] == "current") { 
147                                                 fastaFileNames[i] = m->getFastaFile(); 
148                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
149                                                 else {  
150                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
151                                                         //erase from file list
152                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
153                                                         i--;
154                                                 }
155                                         }
156                                         
157                                         if (!ignore) {
158                                         
159                                                 if (inputDir != "") {
160                                                         string path = m->hasPath(fastaFileNames[i]);
161                                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
163                                                 }
164                 
165                                                 int ableToOpen;
166                                                 ifstream in;
167
168                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
169                                         
170                                                 //if you can't open it, try default location
171                                                 if (ableToOpen == 1) {
172                                                         if (m->getDefaultPath() != "") { //default path is set
173                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
174                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
175                                                                 ifstream in2;
176                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
177                                                                 in2.close();
178                                                                 fastaFileNames[i] = tryPath;
179                                                         }
180                                                 }
181                                                 
182                                                 //if you can't open it, try output location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getOutputDir() != "") { //default path is set
185                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 in.close();                                     
195
196                                                 if (ableToOpen == 1) { 
197                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
198                                                         //erase from file list
199                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
200                                                         i--;
201                                                 }
202                                         }
203                                 }
204                                 
205                                 //make sure there is at least one valid file left
206                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
207                         }
208                 
209                         //check for optional parameter and set defaults
210                         // ...at some point should added some additional type checking...
211                         string temp;
212                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
213                         convert(temp, match);  
214                         
215                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
216                         convert(temp, misMatch);  
217                         
218                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
219                         convert(temp, gapOpen);  
220                         
221                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
222                         convert(temp, gapExtend); 
223                         
224                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
225                         m->setProcessors(temp);
226                         convert(temp, processors);
227                         
228                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
229                         convert(temp, cutoff); 
230                         
231                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
232                         countends = m->isTrue(temp); 
233                         
234                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
235                         compress = m->isTrue(temp); 
236                         
237                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
238                         
239                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
240                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
241                         
242                         calc = validParameter.validFile(parameters, "calc", false);                     
243                         if (calc == "not found") { calc = "onegap";  }
244                         else { 
245                                  if (calc == "default")  {  calc = "onegap";  }
246                         }
247                         m->splitAtDash(calc, Estimators);
248                         
249                         ValidCalculators validCalculator;
250                         if (countends) {
251                                 for (int i=0; i<Estimators.size(); i++) {
252                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
253                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
254                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
255                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
256                                         }
257                                 }
258                         }else {
259                                 for (int i=0; i<Estimators.size(); i++) {
260                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
261                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
262                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
263                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
264                                         }
265                                 }
266                         }
267                 }
268                 
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
272                 exit(1);
273         }
274 }
275 //**********************************************************************************************************************
276
277 int PairwiseSeqsCommand::execute(){
278         try {
279                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
280                 
281                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
282                 
283                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
284                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
285                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
286                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
287                 else {
288                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
289                         m->mothurOutEndLine();
290                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
291                 }
292                 
293                 cutoff += 0.005;
294                 
295                 for (int s = 0; s < fastaFileNames.size(); s++) {
296                         if (m->control_pressed) { outputTypes.clear(); return 0; }
297                         
298                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
299                         
300                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
301                         
302                         ifstream inFASTA;
303                         m->openInputFile(fastaFileNames[s], inFASTA);
304                         alignDB = SequenceDB(inFASTA); 
305                         inFASTA.close();
306                         
307                         int numSeqs = alignDB.getNumSeqs();
308                         int startTime = time(NULL);
309                         string outputFile = "";
310                                 
311                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
312                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
313                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
314                         }else if (output == "column") { //user wants column format
315                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
316                                 outputTypes["column"].push_back(outputFile);
317                                 remove(outputFile.c_str());
318                         }else { //assume square
319                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
320                                 remove(outputFile.c_str());
321                                 outputTypes["phylip"].push_back(outputFile);
322                         }
323                         
324                         #ifdef USE_MPI
325                 
326                         int pid, start, end; 
327                         int tag = 2001;
328                                         
329                         MPI_Status status; 
330                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
331                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
332                         
333                         //each process gets where it should start and stop in the file
334                         if (output != "square") {
335                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
336                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
337                         }else{
338                                 start = int ((float(pid)/float(processors)) * numSeqs);
339                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
340                         }
341                         
342                         if (output == "column") {
343                                 MPI_File outMPI;
344                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
345
346                         char filename[1024];
347                                 strcpy(filename, outputFile.c_str());
348                                 
349                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
350
351                                 if (pid == 0) { //you are the root process 
352                                 
353                                         //do your part
354                                         string outputMyPart;
355                                         
356                                         driverMPI(start, end, outMPI, cutoff); 
357                                         
358                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
359                                 
360                                         //wait on chidren
361                                         for(int i = 1; i < processors; i++) { 
362                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
363                                                 
364                                                 char buf[5];
365                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
366                                         }
367                                 }else { //you are a child process
368                                         //do your part
369                                         driverMPI(start, end, outMPI, cutoff); 
370                                         
371                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
372                                 
373                                         char buf[5];
374                                         strcpy(buf, "done"); 
375                                         //tell parent you are done.
376                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
377                                 }
378                                 
379                                 MPI_File_close(&outMPI);
380                                 
381                         }else { //lower triangle format
382                                 if (pid == 0) { //you are the root process 
383                                 
384                                         //do your part
385                                         string outputMyPart;
386                                         unsigned long int mySize;
387                                         
388                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
389                                         else { driverMPI(start, end, outputFile, mySize, output); }
390                 
391                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
392                                         
393                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
394                                         MPI_File outMPI;
395                                         MPI_File inMPI;
396
397                                         char filename[1024];
398                                         strcpy(filename, outputFile.c_str());
399
400                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
401
402                                         //wait on chidren
403                                         for(int b = 1; b < processors; b++) { 
404                                                 unsigned long int fileSize;
405                                                 
406                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
407                                                 
408                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
409                                                 
410                                                 string outTemp = outputFile + toString(b) + ".temp";
411
412                                                 char* buf = new char[outTemp.length()];
413                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
414                                                 
415                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
416                                                 delete buf;
417
418                                                 int count = 0;
419                                                 while (count < fileSize) { 
420                                                         char buf2[1];
421                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
422                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
423                                                         count += 1;
424                                                 }
425                                                 
426                                                 MPI_File_close(&inMPI); //deleted on close
427                                         }
428                                         
429                                         MPI_File_close(&outMPI);
430                                 }else { //you are a child process
431                                         //do your part
432                                         unsigned long int size;
433                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
434                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
435                                         
436                                         if (m->control_pressed) { delete distCalculator;  return 0; }
437                                 
438                                         //tell parent you are done.
439                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
440                                 }
441                         }
442                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
443         #else           
444                                         
445                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
446                         //if you don't need to fork anything
447                         if(processors == 1){
448                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
449                                 else { driver(0, numSeqs, outputFile, "square");  }
450                         }else{ //you have multiple processors
451                                 
452                                 for (int i = 0; i < processors; i++) {
453                                         distlinePair tempLine;
454                                         lines.push_back(tempLine);
455                                         if (output != "square") {
456                                                 lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
457                                                 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
458                                         }else{
459                                                 lines[i].start = int ((float(i)/float(processors)) * numSeqs);
460                                                 lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
461                                         }
462                                 }
463                                 
464                                 createProcesses(outputFile); 
465                         }
466                 #else
467                         //ifstream inFASTA;
468                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
469                         else { driver(0, numSeqs, outputFile, "square");  }
470                 #endif
471                 
472         #endif
473                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
474                         
475                         #ifdef USE_MPI
476                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
477                                                 
478                                 if (pid == 0) { //only one process should output to screen
479                         #endif
480                         
481                         ifstream fileHandle;
482                         fileHandle.open(outputFile.c_str());
483                         if(fileHandle) {
484                                 m->gobble(fileHandle);
485                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
486                         }
487                         
488                         if (compress) {
489                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
490                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
491                                 system(("gzip -v " + outputFile).c_str());
492                                 outputNames.push_back(outputFile + ".gz");
493                         }else { outputNames.push_back(outputFile); }
494                         
495                         #ifdef USE_MPI
496                                 }
497                         #endif
498                         
499                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
500                         
501                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
502                 }
503                         
504                 delete distCalculator;
505                 
506                 //set phylip file as new current phylipfile
507                 string current = "";
508                 itTypes = outputTypes.find("phylip");
509                 if (itTypes != outputTypes.end()) {
510                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
511                 }
512                 
513                 //set column file as new current columnfile
514                 itTypes = outputTypes.find("column");
515                 if (itTypes != outputTypes.end()) {
516                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
517                 }
518                 
519                 m->mothurOutEndLine();
520                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
521                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
522                 m->mothurOutEndLine();
523                 
524
525                 return 0;
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
529                 exit(1);
530         }
531 }
532
533 /**************************************************************************************************/
534 void PairwiseSeqsCommand::createProcesses(string filename) {
535         try {
536 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
537                 int process = 1;
538                 processIDS.clear();
539                 
540                 //loop through and create all the processes you want
541                 while (process != processors) {
542                         int pid = fork();
543                         
544                         if (pid > 0) {
545                                 processIDS.push_back(pid); 
546                                 process++;
547                         }else if (pid == 0){
548                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
549                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
550                                 exit(0);
551                         }else { 
552                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
553                                 for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; kill (temp, SIGINT); }
554                                 exit(0);
555                         }
556                 }
557                 
558                 //parent do my part
559                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
560                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
561
562                 
563                 //force parent to wait until all the processes are done
564                 for (int i=0;i<processIDS.size();i++) { 
565                         int temp = processIDS[i];
566                         wait(&temp);
567                 }
568                 
569                 //append and remove temp files
570                 for (int i=0;i<processIDS.size();i++) { 
571                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
572                         remove((filename + toString(processIDS[i]) + ".temp").c_str());
573                 }
574 #endif
575         }
576         catch(exception& e) {
577                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
578                 exit(1);
579         }
580 }
581
582 /**************************************************************************************************/
583 /////// need to fix to work with calcs and sequencedb
584 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
585         try {
586
587                 int startTime = time(NULL);
588                 
589                 //column file
590                 ofstream outFile(dFileName.c_str(), ios::trunc);
591                 outFile.setf(ios::fixed, ios::showpoint);
592                 outFile << setprecision(4);
593                 
594                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
595                 
596                 for(int i=startLine;i<endLine;i++){
597                         if(output == "lt")      {       
598                                 string name = alignDB.get(i).getName();
599                                 if (name.length() < 10) { //pad with spaces to make compatible
600                                         while (name.length() < 10) {  name += " ";  }
601                                 }
602                                 outFile << name << '\t';        
603                         }
604                         
605                         for(int j=0;j<i;j++){
606                                 
607                                 if (m->control_pressed) { outFile.close(); return 0;  }
608                                 
609                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
610                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
611                                 }
612                                 
613                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
614                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
615                                 }
616                                 
617                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
618                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
619                                 
620                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
621                                 seqI.setAligned(alignment->getSeqAAln());
622                                 seqJ.setAligned(alignment->getSeqBAln());
623
624                                 
625                                 distCalculator->calcDist(seqI, seqJ);
626                                 double dist = distCalculator->getDist();
627                                 
628                                 if(dist <= cutoff){
629                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
630                                 }
631                                 if (output == "lt") {  outFile << dist << '\t'; }
632                         }
633                         
634                         if (output == "lt") { outFile << endl; }
635                         
636                         if(i % 100 == 0){
637                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
638                         }
639                         
640                 }
641                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
642                 
643                 outFile.close();
644                 
645                 return 1;
646         }
647         catch(exception& e) {
648                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
649                 exit(1);
650         }
651 }
652 /**************************************************************************************************/
653 /////// need to fix to work with calcs and sequencedb
654 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
655         try {
656
657                 int startTime = time(NULL);
658                 
659                 //column file
660                 ofstream outFile(dFileName.c_str(), ios::trunc);
661                 outFile.setf(ios::fixed, ios::showpoint);
662                 outFile << setprecision(4);
663                 
664                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
665                 
666                 for(int i=startLine;i<endLine;i++){
667                                 
668                         string name = alignDB.get(i).getName();
669                         //pad with spaces to make compatible
670                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
671                                 
672                         outFile << name << '\t';        
673                         
674                         for(int j=0;j<alignDB.getNumSeqs();j++){
675                                 
676                                 if (m->control_pressed) { outFile.close(); return 0;  }
677                                 
678                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
679                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
680                                 }
681                                 
682                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
683                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
684                                 }
685                                 
686                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
687                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
688                                 
689                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
690                                 seqI.setAligned(alignment->getSeqAAln());
691                                 seqJ.setAligned(alignment->getSeqBAln());
692                                 
693                                 distCalculator->calcDist(seqI, seqJ);
694                                 double dist = distCalculator->getDist();
695                                                                 
696                                 outFile << dist << '\t'; 
697                         }
698                         
699                         outFile << endl; 
700                         
701                         if(i % 100 == 0){
702                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
703                         }
704                         
705                 }
706                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
707                 
708                 outFile.close();
709                 
710                 return 1;
711         }
712         catch(exception& e) {
713                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
714                 exit(1);
715         }
716 }
717 #ifdef USE_MPI
718 /**************************************************************************************************/
719 /////// need to fix to work with calcs and sequencedb
720 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
721         try {
722                 MPI_Status status;
723                 int startTime = time(NULL);
724                 
725                 string outputString = "";
726                 
727                 for(int i=startLine;i<endLine;i++){
728         
729                         for(int j=0;j<i;j++){
730                                 
731                                 if (m->control_pressed) {  return 0;  }
732                                 
733                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
734                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
735                                 }
736                                 
737                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
738                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
739                                 }
740                                 
741                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
742                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
743                                 
744                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
745                                 seqI.setAligned(alignment->getSeqAAln());
746                                 seqJ.setAligned(alignment->getSeqBAln());
747                                 
748                                 distCalculator->calcDist(seqI, seqJ);
749                                 double dist = distCalculator->getDist();
750                                 
751                                 if(dist <= cutoff){
752                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
753                                 }
754                         }
755                         
756                         if(i % 100 == 0){
757                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
758                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
759                         }
760                         
761                          
762                         //send results to parent
763                         int length = outputString.length();
764
765                         char* buf = new char[length];
766                         memcpy(buf, outputString.c_str(), length);
767                         
768                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
769                         outputString = "";
770                         delete buf;
771                         
772                 }
773                 
774                 return 1;
775         }
776         catch(exception& e) {
777                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
778                 exit(1);
779         }
780 }
781 /**************************************************************************************************/
782 /////// need to fix to work with calcs and sequencedb
783 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
784         try {
785                 MPI_Status status;
786                 
787                 MPI_File outMPI;
788                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
789
790                 char filename[1024];
791                 strcpy(filename, file.c_str());
792
793                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
794
795                 
796                 
797                 string outputString = "";
798                 size = 0;
799                 
800                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
801                 
802                 for(int i=startLine;i<endLine;i++){
803                                 
804                         string name = alignDB.get(i).getName();
805                         if (name.length() < 10) { //pad with spaces to make compatible
806                                 while (name.length() < 10) {  name += " ";  }
807                         }
808                         outputString += name + "\t";    
809                         
810                         for(int j=0;j<i;j++){
811                                 
812                                 if (m->control_pressed) {  return 0;  }
813                                 
814                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
815                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
816                                 }
817                                 
818                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
819                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
820                                 }
821                                 
822                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
823                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
824                                 
825                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
826                                 seqI.setAligned(alignment->getSeqAAln());
827                                 seqJ.setAligned(alignment->getSeqBAln());
828                                 
829                                 distCalculator->calcDist(seqI, seqJ);
830                                 double dist = distCalculator->getDist();
831                                 
832                                 outputString += toString(dist) + "\t"; 
833                         }
834                         
835                         outputString += "\n"; 
836                         
837                         //send results to parent
838                         int length = outputString.length();
839                         char* buf = new char[length];
840                         memcpy(buf, outputString.c_str(), length);
841                         
842                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
843                         size += outputString.length();
844                         outputString = "";
845                         delete buf;
846                 }
847                 
848                 MPI_File_close(&outMPI);
849                 
850                 return 1;
851         }
852         catch(exception& e) {
853                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
854                 exit(1);
855         }
856 }
857 /**************************************************************************************************/
858 /////// need to fix to work with calcs and sequencedb
859 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
860         try {
861                 MPI_Status status;
862                 
863                 MPI_File outMPI;
864                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
865
866                 char filename[1024];
867                 strcpy(filename, file.c_str());
868
869                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
870                 
871                 
872                 
873                 string outputString = "";
874                 size = 0;
875                 
876                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
877                 
878                 for(int i=startLine;i<endLine;i++){
879                                 
880                         string name = alignDB.get(i).getName();
881                         if (name.length() < 10) { //pad with spaces to make compatible
882                                 while (name.length() < 10) {  name += " ";  }
883                         }
884                         outputString += name + "\t";    
885                         
886                         for(int j=0;j<alignDB.getNumSeqs();j++){
887                                 
888                                 if (m->control_pressed) {  return 0;  }
889                                 
890                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
891                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
892                                 }
893                                 
894                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
895                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
896                                 }
897                                 
898                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
899                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
900                                 
901                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
902                                 seqI.setAligned(alignment->getSeqAAln());
903                                 seqJ.setAligned(alignment->getSeqBAln());
904                                 
905                                 distCalculator->calcDist(seqI, seqJ);
906                                 double dist = distCalculator->getDist();
907                                 
908                                 outputString += toString(dist) + "\t"; 
909                         }
910                         
911                         outputString += "\n"; 
912
913                         //send results to parent
914                         int length = outputString.length();
915                         char* buf = new char[length];
916                         memcpy(buf, outputString.c_str(), length);
917                         
918                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
919                         size += outputString.length();
920                         outputString = "";
921                         delete buf;
922                 }
923                 
924                 MPI_File_close(&outMPI);
925                 
926                 return 1;
927         }
928         catch(exception& e) {
929                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
930                 exit(1);
931         }
932 }
933 #endif
934 /**************************************************************************************************/
935