]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> DistanceCommand::setParameters(){        
14         try {
15                 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn","column",false,false); parameters.push_back(pcolumn);
16                 CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn","",false,false); parameters.push_back(poldfasta);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true, true); parameters.push_back(pfasta);
18                 CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false, true); parameters.push_back(poutput);
19                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc);
20                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends);
21                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false, true); parameters.push_back(pprocessors);
23                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false, true); parameters.push_back(pcutoff);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "DistanceCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string DistanceCommand::getHelpString(){        
38         try {
39                 string helpString = "";
40                 helpString += "The dist.seqs command reads a file containing sequences and creates a distance file.\n";
41                 helpString += "The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors.  \n";
42                 helpString += "The fasta parameter is required, unless you have a valid current fasta file.\n";
43                 helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n";
44                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
45                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
46                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
47                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
48                 helpString += "The processors parameter allows you to specify number of processors to use.  The default is 1.\n";
49                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
50                 helpString += "The dist.seqs command should be in the following format: \n";
51                 helpString += "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n";
52                 helpString += "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "DistanceCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string DistanceCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "phylip") {  pattern = "[filename],[outputtag],dist"; } 
67         else if (type == "column") { pattern = "[filename],dist"; }
68         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
69         
70         return pattern;
71     }
72     catch(exception& e) {
73         m->errorOut(e, "DistanceCommand", "getOutputPattern");
74         exit(1);
75     }
76 }
77 //**********************************************************************************************************************
78 DistanceCommand::DistanceCommand(){     
79         try {
80                 abort = true; calledHelp = true; 
81                 setParameters();
82                 vector<string> tempOutNames;
83                 outputTypes["phylip"] = tempOutNames;
84                 outputTypes["column"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 DistanceCommand::DistanceCommand(string option) {
93         try {
94                 abort = false; calledHelp = false;   
95                 Estimators.clear();
96                                 
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string, string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter("dist.seqs");
108                         map<string, string>::iterator it2;
109                 
110                         //check to make sure all parameters are valid for command
111                         for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
112                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["phylip"] = tempOutNames;
118                         outputTypes["column"] = tempOutNames;
119                 
120                         //if the user changes the input directory command factory will send this info to us in the output parameter 
121                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
122                         if (inputDir == "not found"){   inputDir = "";          }
123                         else {
124                                 string path;
125                                 it2 = parameters.find("fasta");
126                                 //user has given a template file
127                                 if(it2 != parameters.end()){ 
128                                         path = m->hasPath(it2->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["fasta"] = inputDir + it2->second;           }
131                                 }
132                                 
133                                 it2 = parameters.find("oldfasta");
134                                 //user has given a template file
135                                 if(it2 != parameters.end()){ 
136                                         path = m->hasPath(it2->second);
137                                         //if the user has not given a path then, add inputdir. else leave path alone.
138                                         if (path == "") {       parameters["oldfasta"] = inputDir + it2->second;                }
139                                 }
140                                 
141                                 it2 = parameters.find("column");
142                                 //user has given a template file
143                                 if(it2 != parameters.end()){ 
144                                         path = m->hasPath(it2->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["column"] = inputDir + it2->second;          }
147                                 }
148                         }
149
150                         //check for required parameters
151                         fastafile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastafile == "not found") {                                 
153                                 fastafile = m->getFastaFile(); 
154                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); 
155                                         ifstream inFASTA;
156                                         m->openInputFile(fastafile, inFASTA);
157                                         alignDB = SequenceDB(inFASTA); 
158                                         inFASTA.close();
159                                 }else {         m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
160                         }else if (fastafile == "not open") { abort = true; }    
161                         else{
162                                 ifstream inFASTA;
163                                 m->openInputFile(fastafile, inFASTA);
164                                 alignDB = SequenceDB(inFASTA); 
165                                 inFASTA.close();
166                                 m->setFastaFile(fastafile);
167                         }
168                         
169                         oldfastafile = validParameter.validFile(parameters, "oldfasta", true);
170                         if (oldfastafile == "not found") { oldfastafile = ""; }
171                         else if (oldfastafile == "not open") { abort = true; }  
172                         
173                         column = validParameter.validFile(parameters, "column", true);
174                         if (column == "not found") { column = ""; }
175                         else if (column == "not open") { abort = true; }        
176                         else { m->setColumnFile(column); }
177                         
178                         //if the user changes the output directory command factory will send this info to us in the output parameter 
179                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
180                                 outputDir = ""; 
181                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
182                         }
183
184                         //check for optional parameter and set defaults
185                         // ...at some point should added some additional type checking...
186                         calc = validParameter.validFile(parameters, "calc", false);                     
187                         if (calc == "not found") { calc = "onegap";  }
188                         else { 
189                                  if (calc == "default")  {  calc = "onegap";  }
190                         }
191                         m->splitAtDash(calc, Estimators);
192
193                         string temp;
194                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
195                         convert(temp, countends); 
196                         
197                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
198                         m->mothurConvert(temp, cutoff); 
199                         
200                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
201                         m->setProcessors(temp);
202                         m->mothurConvert(temp, processors);
203                         
204                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
205                         convert(temp, compress);
206
207                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
208             if (output == "phylip") { output = "lt";  }
209                         
210                         if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
211                         
212                         if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
213                         
214                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
215
216                 }
217                                 
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
221                 exit(1);
222         }
223 }
224 //**********************************************************************************************************************
225
226 int DistanceCommand::execute(){
227         try {
228                 
229                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
230                 
231                 int startTime = time(NULL);
232                 
233                 //save number of new sequence
234                 numNewFasta = alignDB.getNumSeqs();
235                 
236                 //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
237                 if ((oldfastafile != "") && (column != ""))  {  if (!(sanityCheck())) { return 0; }  }
238                 
239                 if (m->control_pressed) { return 0; }
240                 
241                 int numSeqs = alignDB.getNumSeqs();
242                 cutoff += 0.005;
243                 
244                 if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
245                 
246                 string outputFile;
247         
248         map<string, string> variables; 
249         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
250                 if (output == "lt") { //does the user want lower triangle phylip formatted file 
251             variables["[outputtag]"] = "phylip";
252                         outputFile = getOutputFileName("phylip", variables);
253                         m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
254                         
255                         //output numSeqs to phylip formatted dist file
256                 }else if (output == "column") { //user wants column format
257                         outputFile = getOutputFileName("column", variables);
258                         outputTypes["column"].push_back(outputFile);
259                         
260                         //so we don't accidentally overwrite
261                         if (outputFile == column) { 
262                                 string tempcolumn = column + ".old"; 
263                                 rename(column.c_str(), tempcolumn.c_str());
264                         }
265                         
266                         m->mothurRemove(outputFile);
267                 }else { //assume square
268                         variables["[outputtag]"] = "square";
269                         outputFile = getOutputFileName("phylip", variables);
270                         m->mothurRemove(outputFile);
271                         outputTypes["phylip"].push_back(outputFile);
272                 }
273                 
274
275 #ifdef USE_MPI
276                 
277                 int pid, start, end; 
278                 int tag = 2001;
279                                 
280                 MPI_Status status; 
281                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
282                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
283                 
284                 //each process gets where it should start and stop in the file
285                 if (output != "square") {
286                         start = int (sqrt(float(pid)/float(processors)) * numSeqs);
287                         end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
288                 }else{
289                         start = int ((float(pid)/float(processors)) * numSeqs);
290                         end = int ((float(pid+1)/float(processors)) * numSeqs);
291                 }
292                 
293                 if (output == "column") {
294                         MPI_File outMPI;
295                         int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
296
297                         //char* filename = new char[outputFile.length()];
298                         //memcpy(filename, outputFile.c_str(), outputFile.length());
299                         
300                         char filename[1024];
301                         strcpy(filename, outputFile.c_str());
302                         
303                         MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
304                         //delete filename;
305
306                         if (pid == 0) { //you are the root process 
307                                 
308                                 //do your part
309                                 string outputMyPart;
310                                 
311                                 driverMPI(start, end, outMPI, cutoff); 
312                                 
313                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
314                         
315                                 //wait on chidren
316                                 for(int i = 1; i < processors; i++) { 
317                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
318                                         
319                                         char buf[5];
320                                         MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
321                                 }
322                         }else { //you are a child process
323                                 //do your part
324                                 driverMPI(start, end, outMPI, cutoff); 
325                                 
326                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
327                         
328                                 char buf[5];
329                                 strcpy(buf, "done"); 
330                                 //tell parent you are done.
331                                 MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
332                         }
333                         
334                         MPI_File_close(&outMPI);
335                         
336                 }else { //lower triangle format
337                         if (pid == 0) { //you are the root process 
338                         
339                                 //do your part
340                                 string outputMyPart;
341                                 unsigned long long mySize;
342                                 
343                                 if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
344                                 else { driverMPI(start, end, outputFile, mySize, output); }
345         
346                                 if (m->control_pressed) {  outputTypes.clear();   return 0; }
347                                 
348                                 int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
349                                 MPI_File outMPI;
350                                 MPI_File inMPI;
351
352                                 //char* filename = new char[outputFile.length()];
353                                 //memcpy(filename, outputFile.c_str(), outputFile.length());
354                                 
355                                 char filename[1024];
356                                 strcpy(filename, outputFile.c_str());
357
358                                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
359                                 //delete filename;
360
361                                 //wait on chidren
362                                 for(int b = 1; b < processors; b++) { 
363                                         unsigned long long fileSize;
364                                         
365                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
366                                         
367                                         MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
368                                         
369                                         string outTemp = outputFile + toString(b) + ".temp";
370
371                                         char* buf = new char[outTemp.length()];
372                                         memcpy(buf, outTemp.c_str(), outTemp.length());
373                                         
374                                         MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
375                                         delete buf;
376
377                                         int count = 0;
378                                         while (count < fileSize) { 
379                                                 char buf2[1];
380                                                 MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
381                                                 MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
382                                                 count += 1;
383                                         }
384                                         
385                                         MPI_File_close(&inMPI); //deleted on close
386                                 }
387                                 
388                                 MPI_File_close(&outMPI);
389                         }else { //you are a child process
390                                 //do your part
391                                 unsigned long long size;
392                                 if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
393                                 else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
394                                 
395                                 if (m->control_pressed) {  return 0; }
396                         
397                                 //tell parent you are done.
398                                 MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
399                         }
400                 }
401                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
402 #else           
403                                 
404         //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
405                 //if you don't need to fork anything
406                 if(processors == 1){
407                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
408                         else { driver(0, numSeqs, outputFile, "square");  }
409                 }else{ //you have multiple processors
410                         
411                         unsigned long long numDists = 0;
412                         
413                         if (output == "square") {
414                                  numDists = numSeqs * numSeqs;
415                         }else {
416                                 for(int i=0;i<numSeqs;i++){
417                                         for(int j=0;j<i;j++){
418                                                 numDists++;
419                                                 if (numDists > processors) { break; }
420                                         }
421                                 }
422                         }
423                         
424                         if (numDists < processors) { processors = numDists; }
425                         
426                         for (int i = 0; i < processors; i++) {
427                                 distlinePair tempLine;
428                                 lines.push_back(tempLine);
429                                 if (output != "square") {
430                                         lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
431                                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
432                                 }else{
433                                         lines[i].start = int ((float(i)/float(processors)) * numSeqs);
434                                         lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
435                                 }
436                                 
437                         }
438                         
439                         createProcesses(outputFile); 
440                 }
441         //#else
442                 //ifstream inFASTA;
443                 //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
444                 //else { driver(0, numSeqs, outputFile, "square");  }
445         //#endif
446         
447 #endif
448                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
449                 
450                 #ifdef USE_MPI
451                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
452                                         
453                         if (pid == 0) { //only one process should output to screen
454                 #endif
455                 
456                 //if (output == "square") {  convertMatrix(outputFile); }
457                 
458                 ifstream fileHandle;
459                 fileHandle.open(outputFile.c_str());
460                 if(fileHandle) {
461                         m->gobble(fileHandle);
462                         if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
463                 }
464                 
465                 //append the old column file to the new one
466                 if ((oldfastafile != "") && (column != ""))  {
467                         //we had to rename the column file so we didnt overwrite above, but we want to keep old name
468                         if (outputFile == column) { 
469                                 string tempcolumn = column + ".old";
470                                 m->appendFiles(tempcolumn, outputFile);
471                                 m->mothurRemove(tempcolumn);
472                         }else{
473                                 m->appendFiles(outputFile, column);
474                                 m->mothurRemove(outputFile);
475                                 outputFile = column;
476                         }
477                         
478                         if (outputDir != "") { 
479                                 string newOutputName = outputDir + m->getSimpleName(outputFile);
480                                 rename(outputFile.c_str(), newOutputName.c_str());
481                                 m->mothurRemove(outputFile);
482                                 outputFile = newOutputName;
483                         }
484                 }
485
486                 
487                 #ifdef USE_MPI
488                         }
489                 #endif
490                 
491                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
492                 
493                 //set phylip file as new current phylipfile
494                 string current = "";
495                 itTypes = outputTypes.find("phylip");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
498                 }
499                 
500                 //set column file as new current columnfile
501                 itTypes = outputTypes.find("column");
502                 if (itTypes != outputTypes.end()) {
503                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
504                 }
505                 
506                 m->mothurOutEndLine();
507                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
508                 m->mothurOut(outputFile); m->mothurOutEndLine();
509                 m->mothurOutEndLine();
510                 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
511
512
513                 if (m->isTrue(compress)) {
514                         m->mothurOut("Compressing..."); m->mothurOutEndLine();
515                         m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
516                         system(("gzip -v " + outputFile).c_str());
517                         outputNames.push_back(outputFile + ".gz");
518                 }else { outputNames.push_back(outputFile); }
519
520                 return 0;
521                 
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "DistanceCommand", "execute");
525                 exit(1);
526         }
527 }
528 /**************************************************************************************************/
529 void DistanceCommand::createProcesses(string filename) {
530         try {
531 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
532                 int process = 1;
533                 processIDS.clear();
534                 
535                 //loop through and create all the processes you want
536                 while (process != processors) {
537                         int pid = fork();
538                         
539                         if (pid > 0) {
540                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
541                                 process++;
542                         }else if (pid == 0){
543                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
544                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
545                                 exit(0);
546                         }else { 
547                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
548                                 perror(" : ");
549                                 for (int i=0;i<processIDS.size();i++) {  int temp = processIDS[i]; kill (temp, SIGINT); }
550                                 exit(0);
551                         }
552                 }
553                 
554                 //parent does its part
555                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
556                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
557                 
558                 
559                 //force parent to wait until all the processes are done
560                 for (int i=0;i<processIDS.size();i++) { 
561                         int temp = processIDS[i];
562                         wait(&temp);
563                 }
564 #else
565                 //////////////////////////////////////////////////////////////////////////////////////////////////////
566                 //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
567                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
568                 //that's why the distance calculator was moved inside of the driver to make separate copies.
569                 //////////////////////////////////////////////////////////////////////////////////////////////////////
570                 
571                 vector<distanceData*> pDataArray; //[processors-1];
572                 DWORD   dwThreadIdArray[processors-1];
573                 HANDLE  hThreadArray[processors-1]; 
574                 
575                 //Create processor-1 worker threads.
576                 for( int i=0; i<processors-1; i++ ){
577                         
578                         // Allocate memory for thread data.
579                         distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
580                         pDataArray.push_back(tempDist);
581                         processIDS.push_back(i);
582                         
583                         //MyDistThreadFunction is in header. It must be global or static to work with the threads.
584                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
585                         hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
586                 }
587                 
588                 //do your part
589                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
590                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
591                 
592                 //Wait until all threads have terminated.
593                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
594                 
595                 //Close all thread handles and free memory allocations.
596                 for(int i=0; i < pDataArray.size(); i++){
597                         CloseHandle(hThreadArray[i]);
598                         delete pDataArray[i];
599                 }
600 #endif
601                 
602                 //append and remove temp files
603                 for (int i=0;i<processIDS.size();i++) { 
604                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
605                         m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
606                 }
607                 
608         }
609         catch(exception& e) {
610                 m->errorOut(e, "DistanceCommand", "createProcesses");
611                 exit(1);
612         }
613 }
614 /**************************************************************************************************/
615 /////// need to fix to work with calcs and sequencedb
616 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
617         try {
618                 ValidCalculators validCalculator;
619                 Dist* distCalculator;
620                 if (m->isTrue(countends) == true) {
621                         for (int i=0; i<Estimators.size(); i++) {
622                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
623                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
624                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
625                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
626                                 }
627                         }
628                 }else {
629                         for (int i=0; i<Estimators.size(); i++) {
630                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
631                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
632                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
633                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
634                                 }
635                         }
636                 }
637                 
638                 int startTime = time(NULL);
639                 
640                 //column file
641                 ofstream outFile(dFileName.c_str(), ios::trunc);
642                 outFile.setf(ios::fixed, ios::showpoint);
643                 outFile << setprecision(4);
644                 
645                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
646                 
647                 for(int i=startLine;i<endLine;i++){
648                         if(output == "lt")      {       
649                                 string name = alignDB.get(i).getName();
650                                 if (name.length() < 10) { //pad with spaces to make compatible
651                                         while (name.length() < 10) {  name += " ";  }
652                                 }
653                                 outFile << name << '\t';        
654                         }
655                         for(int j=0;j<i;j++){
656                                 
657                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
658                                 
659                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
660                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
661                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
662                                 
663                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
664                                 double dist = distCalculator->getDist();
665                                 
666                                 if(dist <= cutoff){
667                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
668                                 }
669                                 if (output == "lt") {  outFile << dist << '\t'; }
670                         }
671                         
672                         if (output == "lt") { outFile << endl; }
673                         
674                         if(i % 100 == 0){
675                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
676                         }
677                         
678                 }
679                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
680                 
681                 outFile.close();
682                 delete distCalculator;
683                 
684                 return 1;
685         }
686         catch(exception& e) {
687                 m->errorOut(e, "DistanceCommand", "driver");
688                 exit(1);
689         }
690 }
691 /**************************************************************************************************/
692 /////// need to fix to work with calcs and sequencedb
693 int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
694         try {
695                 ValidCalculators validCalculator;
696                 Dist* distCalculator;
697                 if (m->isTrue(countends) == true) {
698                         for (int i=0; i<Estimators.size(); i++) {
699                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
700                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
701                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
702                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
703                                 }
704                         }
705                 }else {
706                         for (int i=0; i<Estimators.size(); i++) {
707                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
708                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
709                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
710                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
711                                 }
712                         }
713                 }
714                 
715                 int startTime = time(NULL);
716                 
717                 //column file
718                 ofstream outFile(dFileName.c_str(), ios::trunc);
719                 outFile.setf(ios::fixed, ios::showpoint);
720                 outFile << setprecision(4);
721                 
722                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
723                 
724                 for(int i=startLine;i<endLine;i++){
725                                 
726                         string name = alignDB.get(i).getName();
727                         //pad with spaces to make compatible
728                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
729                                 
730                         outFile << name << '\t';        
731                         
732                         for(int j=0;j<alignDB.getNumSeqs();j++){
733                                 
734                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
735                                 
736                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
737                                 double dist = distCalculator->getDist();
738                                 
739                                 outFile << dist << '\t'; 
740                         }
741                         
742                         outFile << endl; 
743                         
744                         if(i % 100 == 0){
745                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
746                         }
747                         
748                 }
749                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
750                 
751                 outFile.close();
752                 delete distCalculator;
753                 
754                 return 1;
755         }
756         catch(exception& e) {
757                 m->errorOut(e, "DistanceCommand", "driver");
758                 exit(1);
759         }
760 }
761 #ifdef USE_MPI
762 /**************************************************************************************************/
763 /////// need to fix to work with calcs and sequencedb
764 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
765         try {
766                 
767                 ValidCalculators validCalculator;
768                 Dist* distCalculator;
769                 if (m->isTrue(countends) == true) {
770                         for (int i=0; i<Estimators.size(); i++) {
771                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
772                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
773                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
774                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
775                                 }
776                         }
777                 }else {
778                         for (int i=0; i<Estimators.size(); i++) {
779                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
780                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
781                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
782                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
783                                 }
784                         }
785                 }
786                 
787                 
788                 MPI_Status status;
789                 int startTime = time(NULL);
790                 
791                 string outputString = "";
792                 
793                 for(int i=startLine;i<endLine;i++){
794         
795                         for(int j=0;j<i;j++){
796                                 
797                                 if (m->control_pressed) {  delete distCalculator; return 0;  }
798                                 
799                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
800                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
801                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
802                                 
803                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
804                                 double dist = distCalculator->getDist();
805                                 
806                                 if(dist <= cutoff){
807                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
808                                 }
809                         }
810                         
811                         if(i % 100 == 0){
812                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
813                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
814                         }
815                         
816                          
817                         //send results to parent
818                         int length = outputString.length();
819
820                         char* buf = new char[length];
821                         memcpy(buf, outputString.c_str(), length);
822                         
823                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
824                         outputString = "";
825                         delete buf;
826                         
827                 }
828                 
829                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
830                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;        
831                 delete distCalculator;
832                 return 1;
833         }
834         catch(exception& e) {
835                 m->errorOut(e, "DistanceCommand", "driverMPI");
836                 exit(1);
837         }
838 }
839 /**************************************************************************************************/
840 /////// need to fix to work with calcs and sequencedb
841 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size){
842         try {
843                 ValidCalculators validCalculator;
844                 Dist* distCalculator;
845                 if (m->isTrue(countends) == true) {
846                         for (int i=0; i<Estimators.size(); i++) {
847                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
848                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
849                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
850                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
851                                 }
852                         }
853                 }else {
854                         for (int i=0; i<Estimators.size(); i++) {
855                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
856                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
857                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
858                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
859                                 }
860                         }
861                 }
862                 
863                 
864                 MPI_Status status;
865                 
866                 MPI_File outMPI;
867                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
868
869                 //char* filename = new char[file.length()];
870                 //memcpy(filename, file.c_str(), file.length());
871                 
872                 char filename[1024];
873                 strcpy(filename, file.c_str());
874
875                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
876                 //delete filename;
877
878                 int startTime = time(NULL);
879                 
880                 string outputString = "";
881                 size = 0;
882                 
883                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
884                 
885                 for(int i=startLine;i<endLine;i++){
886                                 
887                         string name = alignDB.get(i).getName();
888                         if (name.length() < 10) { //pad with spaces to make compatible
889                                 while (name.length() < 10) {  name += " ";  }
890                         }
891                         outputString += name + "\t";    
892                         
893                         for(int j=0;j<i;j++){
894                                 
895                                 if (m->control_pressed) { delete distCalculator; return 0;  }
896                                 
897                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
898                                 double dist = distCalculator->getDist();
899                                 
900                                 outputString += toString(dist) + "\t"; 
901                         }
902                         
903                         outputString += "\n"; 
904
905                 
906                         if(i % 100 == 0){
907                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
908                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
909                         }
910                         
911                         
912                         //send results to parent
913                         int length = outputString.length();
914                         char* buf = new char[length];
915                         memcpy(buf, outputString.c_str(), length);
916                         
917                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
918                         size += outputString.length();
919                         outputString = "";
920                         delete buf;
921                 }
922                 
923                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
924                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
925                 MPI_File_close(&outMPI);
926                 delete distCalculator;
927                 
928                 return 1;
929         }
930         catch(exception& e) {
931                 m->errorOut(e, "DistanceCommand", "driverMPI");
932                 exit(1);
933         }
934 }
935 /**************************************************************************************************/
936 /////// need to fix to work with calcs and sequencedb
937 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){
938         try {
939                 ValidCalculators validCalculator;
940                 Dist* distCalculator;
941                 if (m->isTrue(countends) == true) {
942                         for (int i=0; i<Estimators.size(); i++) {
943                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
944                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
945                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
946                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
947                                 }
948                         }
949                 }else {
950                         for (int i=0; i<Estimators.size(); i++) {
951                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
952                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
953                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
954                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
955                                 }
956                         }
957                 }
958                 
959                 MPI_Status status;
960                 
961                 MPI_File outMPI;
962                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
963
964                 //char* filename = new char[file.length()];
965                 //memcpy(filename, file.c_str(), file.length());
966                 
967                 char filename[1024];
968                 strcpy(filename, file.c_str());
969
970                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
971                 //delete filename;
972
973                 int startTime = time(NULL);
974                 
975                 string outputString = "";
976                 size = 0;
977                 
978                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
979                 
980                 for(int i=startLine;i<endLine;i++){
981                                 
982                         string name = alignDB.get(i).getName();
983                         if (name.length() < 10) { //pad with spaces to make compatible
984                                 while (name.length() < 10) {  name += " ";  }
985                         }
986                         outputString += name + "\t";    
987                         
988                         for(int j=0;j<alignDB.getNumSeqs();j++){
989                                 
990                                 if (m->control_pressed) { delete distCalculator; return 0;  }
991                                 
992                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
993                                 double dist = distCalculator->getDist();
994                                 
995                                 outputString += toString(dist) + "\t"; 
996                         }
997                         
998                         outputString += "\n"; 
999
1000                 
1001                         if(i % 100 == 0){
1002                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
1003                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
1004                         }
1005                         
1006                         
1007                         //send results to parent
1008                         int length = outputString.length();
1009                         char* buf = new char[length];
1010                         memcpy(buf, outputString.c_str(), length);
1011                         
1012                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
1013                         size += outputString.length();
1014                         outputString = "";
1015                         delete buf;
1016                 }
1017                 
1018                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
1019                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
1020                 MPI_File_close(&outMPI);
1021                 delete distCalculator;
1022                 return 1;
1023         }
1024         catch(exception& e) {
1025                 m->errorOut(e, "DistanceCommand", "driverMPI");
1026                 exit(1);
1027         }
1028 }
1029 #endif
1030 /**************************************************************************************************
1031 int DistanceCommand::convertMatrix(string outputFile) {
1032         try{
1033
1034                 //sort file by first column so the distances for each row are together
1035                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1036                 
1037                 //use the unix sort 
1038                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1039                         string command = "sort -n " + outputFile + " -o " + outfile;
1040                         system(command.c_str());
1041                 #else //sort using windows sort
1042                         string command = "sort " + outputFile + " /O " + outfile;
1043                         system(command.c_str());
1044                 #endif
1045                 
1046
1047                 //output to new file distance for each row and save positions in file where new row begins
1048                 ifstream in;
1049                 m->openInputFile(outfile, in);
1050                 
1051                 ofstream out;
1052                 m->openOutputFile(outputFile, out);
1053                 
1054                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1055
1056                 out << alignDB.getNumSeqs() << endl;
1057                 
1058                 //get first currentRow
1059                 string first, currentRow, second;
1060                 float dist;
1061                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1062                 map<string, float>::iterator it;
1063                 
1064                 in >> first;
1065                 currentRow = first;
1066                 
1067                 rowDists[first] = 0.00; //distance to yourself is 0.0
1068                 
1069                 in.seekg(0);
1070                 //m->openInputFile(outfile, in);
1071                 
1072                 while(!in.eof()) {
1073                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1074                         
1075                         in >> first >> second >> dist; m->gobble(in);
1076                                 
1077                         if (first != currentRow) {
1078                                 //print out last row
1079                                 out << currentRow << '\t'; //print name
1080
1081                                 //print dists
1082                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1083                                         out << it->second << '\t';
1084                                 }
1085                                 out << endl;
1086                                 
1087                                 //start new row
1088                                 currentRow = first;
1089                                 rowDists.clear();
1090                                 rowDists[first] = 0.00;
1091                                 rowDists[second] = dist;
1092                         }else{
1093                                 rowDists[second] = dist;
1094                         }
1095                 }
1096                 //print out last row
1097                 out << currentRow << '\t'; //print name
1098                                 
1099                 //print dists
1100                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1101                         out << it->second << '\t';
1102                 }
1103                 out << endl;
1104                 
1105                 in.close();
1106                 out.close();
1107                 
1108                 m->mothurRemove(outfile);
1109                 
1110                 return 1;
1111                 
1112         }
1113         catch(exception& e) {
1114                 m->errorOut(e, "DistanceCommand", "convertMatrix");
1115                 exit(1);
1116         }
1117 }
1118 **************************************************************************************************
1119 int DistanceCommand::convertToLowerTriangle(string outputFile) {
1120         try{
1121
1122                 //sort file by first column so the distances for each row are together
1123                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1124                 
1125                 //use the unix sort 
1126                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1127                         string command = "sort -n " + outputFile + " -o " + outfile;
1128                         system(command.c_str());
1129                 #else //sort using windows sort
1130                         string command = "sort " + outputFile + " /O " + outfile;
1131                         system(command.c_str());
1132                 #endif
1133                 
1134
1135                 //output to new file distance for each row and save positions in file where new row begins
1136                 ifstream in;
1137                 m->openInputFile(outfile, in);
1138                 
1139                 ofstream out;
1140                 m->openOutputFile(outputFile, out);
1141                 
1142                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1143
1144                 out << alignDB.getNumSeqs() << endl;
1145                 
1146                 //get first currentRow
1147                 string first, currentRow, second;
1148                 float dist;
1149                 int i, j;
1150                 i = 0; j = 0;
1151                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1152                 map<string, float>::iterator it;
1153                 
1154                 in >> first;
1155                 currentRow = first;
1156                 
1157                 rowDists[first] = 0.00; //distance to yourself is 0.0
1158                 
1159                 in.seekg(0);
1160                 //m->openInputFile(outfile, in);
1161                 
1162                 while(!in.eof()) {
1163                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1164                         
1165                         in >> first >> second >> dist; m->gobble(in);
1166                                 
1167                         if (first != currentRow) {
1168                                 //print out last row
1169                                 out << currentRow << '\t'; //print name
1170
1171                                 //print dists
1172                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1173                                         if (j >= i) { break; }
1174                                         out << it->second << '\t';
1175                                         j++;
1176                                 }
1177                                 out << endl;
1178                                 
1179                                 //start new row
1180                                 currentRow = first;
1181                                 rowDists.clear();
1182                                 rowDists[first] = 0.00;
1183                                 rowDists[second] = dist;
1184                                 j = 0;
1185                                 i++;
1186                         }else{
1187                                 rowDists[second] = dist;
1188                         }
1189                 }
1190                 //print out last row
1191                 out << currentRow << '\t'; //print name
1192                                 
1193                 //print dists
1194                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1195                         out << it->second << '\t';
1196                 }
1197                 out << endl;
1198                 
1199                 in.close();
1200                 out.close();
1201                 
1202                 m->mothurRemove(outfile);
1203                 
1204                 return 1;
1205                 
1206         }
1207         catch(exception& e) {
1208                 m->errorOut(e, "DistanceCommand", "convertToLowerTriangle");
1209                 exit(1);
1210         }
1211 }
1212 **************************************************************************************************/
1213 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
1214 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
1215 //also check to make sure the 2 files have the same alignment length.
1216 bool DistanceCommand::sanityCheck() {
1217         try{
1218                 bool good = true;
1219                 
1220                 //make sure the 2 fasta files have the same alignment length
1221                 ifstream in;
1222                 m->openInputFile(fastafile, in);
1223                 int fastaAlignLength = 0;
1224                 if (in) { 
1225                         Sequence tempIn(in);
1226                         fastaAlignLength = tempIn.getAligned().length();
1227                 }
1228                 in.close();
1229                 
1230                 ifstream in2;
1231                 m->openInputFile(oldfastafile, in2);
1232                 int oldfastaAlignLength = 0;
1233                 if (in2) { 
1234                         Sequence tempIn2(in2);
1235                         oldfastaAlignLength = tempIn2.getAligned().length();
1236                 }
1237                 in2.close();
1238                 
1239                 if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length."); m->mothurOutEndLine(); return false;  }
1240                 
1241                 //read fasta file and save names as well as adding them to the alignDB
1242                 set<string> namesOldFasta;
1243                 
1244                 ifstream inFasta;
1245                 m->openInputFile(oldfastafile, inFasta);
1246                 
1247                 while (!inFasta.eof()) {
1248                         if (m->control_pressed) {  inFasta.close(); return good;  }
1249                 
1250                         Sequence temp(inFasta);
1251                         
1252                         if (temp.getName() != "") {
1253                                 namesOldFasta.insert(temp.getName());  //save name
1254                                 alignDB.push_back(temp);  //add to DB
1255                         }
1256                         
1257                         m->gobble(inFasta);
1258                 }
1259                 
1260                 inFasta.close();
1261                 
1262                 //read through the column file checking names and removing distances above the cutoff
1263                 ifstream inDist;
1264                 m->openInputFile(column, inDist);
1265                 
1266                 ofstream outDist;
1267                 string outputFile = column + ".temp";
1268                 m->openOutputFile(outputFile, outDist);
1269                 
1270                 string name1, name2;
1271                 float dist;
1272                 while (!inDist.eof()) {
1273                         if (m->control_pressed) {  inDist.close(); outDist.close(); m->mothurRemove(outputFile); return good;  }
1274                 
1275                         inDist >> name1 >> name2 >> dist; m->gobble(inDist);
1276                         
1277                         //both names are in fasta file and distance is below cutoff
1278                         if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) {  good = false; break;  }
1279                         else{
1280                                 if (dist <= cutoff) {
1281                                         outDist << name1 << '\t' << name2 << '\t' << dist << endl;
1282                                 }
1283                         }
1284                 }
1285                 
1286                 inDist.close();
1287                 outDist.close();
1288                 
1289                 if (good) {
1290                         m->mothurRemove(column);
1291                         rename(outputFile.c_str(), column.c_str());
1292                 }else{
1293                         m->mothurRemove(outputFile); //temp file is bad because file mismatch above
1294                 }
1295                 
1296                 return good;
1297                 
1298         }
1299         catch(exception& e) {
1300                 m->errorOut(e, "DistanceCommand", "sanityCheck");
1301                 exit(1);
1302         }
1303 }
1304 /**************************************************************************************************/
1305
1306
1307
1308