]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
1.22.0
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> DistanceCommand::setParameters(){        
14         try {
15                 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn);
16                 CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
19                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
20                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
21                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "DistanceCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string DistanceCommand::getHelpString(){        
38         try {
39                 string helpString = "";
40                 helpString += "The dist.seqs command reads a file containing sequences and creates a distance file.\n";
41                 helpString += "The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors.  \n";
42                 helpString += "The fasta parameter is required, unless you have a valid current fasta file.\n";
43                 helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n";
44                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
45                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
46                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
47                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
48                 helpString += "The processors parameter allows you to specify number of processors to use.  The default is 1.\n";
49                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
50                 helpString += "The dist.seqs command should be in the following format: \n";
51                 helpString += "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n";
52                 helpString += "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "DistanceCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 DistanceCommand::DistanceCommand(){     
63         try {
64                 abort = true; calledHelp = true; 
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 DistanceCommand::DistanceCommand(string option) {
77         try {
78                 abort = false; calledHelp = false;   
79                 Estimators.clear();
80                                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string, string> parameters = parser.getParameters();
90                         
91                         ValidParameters validParameter("dist.seqs");
92                         map<string, string>::iterator it2;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
96                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["phylip"] = tempOutNames;
102                         outputTypes["column"] = tempOutNames;
103                 
104                         //if the user changes the input directory command factory will send this info to us in the output parameter 
105                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
106                         if (inputDir == "not found"){   inputDir = "";          }
107                         else {
108                                 string path;
109                                 it2 = parameters.find("fasta");
110                                 //user has given a template file
111                                 if(it2 != parameters.end()){ 
112                                         path = m->hasPath(it2->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["fasta"] = inputDir + it2->second;           }
115                                 }
116                                 
117                                 it2 = parameters.find("oldfasta");
118                                 //user has given a template file
119                                 if(it2 != parameters.end()){ 
120                                         path = m->hasPath(it2->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["oldfasta"] = inputDir + it2->second;                }
123                                 }
124                                 
125                                 it2 = parameters.find("column");
126                                 //user has given a template file
127                                 if(it2 != parameters.end()){ 
128                                         path = m->hasPath(it2->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["column"] = inputDir + it2->second;          }
131                                 }
132                         }
133
134                         //check for required parameters
135                         fastafile = validParameter.validFile(parameters, "fasta", true);
136                         if (fastafile == "not found") {                                 
137                                 fastafile = m->getFastaFile(); 
138                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); 
139                                         ifstream inFASTA;
140                                         m->openInputFile(fastafile, inFASTA);
141                                         alignDB = SequenceDB(inFASTA); 
142                                         inFASTA.close();
143                                 }else {         m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
144                         }else if (fastafile == "not open") { abort = true; }    
145                         else{
146                                 ifstream inFASTA;
147                                 m->openInputFile(fastafile, inFASTA);
148                                 alignDB = SequenceDB(inFASTA); 
149                                 inFASTA.close();
150                                 m->setFastaFile(fastafile);
151                         }
152                         
153                         oldfastafile = validParameter.validFile(parameters, "oldfasta", true);
154                         if (oldfastafile == "not found") { oldfastafile = ""; }
155                         else if (oldfastafile == "not open") { abort = true; }  
156                         
157                         column = validParameter.validFile(parameters, "column", true);
158                         if (column == "not found") { column = ""; }
159                         else if (column == "not open") { abort = true; }        
160                         else { m->setColumnFile(column); }
161                         
162                         //if the user changes the output directory command factory will send this info to us in the output parameter 
163                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
164                                 outputDir = ""; 
165                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
166                         }
167
168                         //check for optional parameter and set defaults
169                         // ...at some point should added some additional type checking...
170                         calc = validParameter.validFile(parameters, "calc", false);                     
171                         if (calc == "not found") { calc = "onegap";  }
172                         else { 
173                                  if (calc == "default")  {  calc = "onegap";  }
174                         }
175                         m->splitAtDash(calc, Estimators);
176
177                         string temp;
178                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
179                         convert(temp, countends); 
180                         
181                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
182                         convert(temp, cutoff); 
183                         
184                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
185                         m->setProcessors(temp);
186                         convert(temp, processors);
187                         
188                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
189                         convert(temp, compress);
190
191                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
192                         
193                         if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
194                         
195                         if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
196                         
197                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
198
199                 }
200                                 
201         }
202         catch(exception& e) {
203                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
204                 exit(1);
205         }
206 }
207 //**********************************************************************************************************************
208
209 int DistanceCommand::execute(){
210         try {
211                 
212                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
213                 
214                 int startTime = time(NULL);
215                 
216                 //save number of new sequence
217                 numNewFasta = alignDB.getNumSeqs();
218                 
219                 //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
220                 if ((oldfastafile != "") && (column != ""))  {  if (!(sanityCheck())) { return 0; }  }
221                 
222                 if (m->control_pressed) { return 0; }
223                 
224                 int numSeqs = alignDB.getNumSeqs();
225                 cutoff += 0.005;
226                 
227                 if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
228                 
229                 string outputFile;
230                                 
231                 if (output == "lt") { //does the user want lower triangle phylip formatted file 
232                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip.dist";
233                         m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
234                         
235                         //output numSeqs to phylip formatted dist file
236                 }else if (output == "column") { //user wants column format
237                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
238                         outputTypes["column"].push_back(outputFile);
239                         
240                         //so we don't accidentally overwrite
241                         if (outputFile == column) { 
242                                 string tempcolumn = column + ".old"; 
243                                 rename(column.c_str(), tempcolumn.c_str());
244                         }
245                         
246                         m->mothurRemove(outputFile);
247                 }else { //assume square
248                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square.dist";
249                         m->mothurRemove(outputFile);
250                         outputTypes["phylip"].push_back(outputFile);
251                 }
252                 
253
254 #ifdef USE_MPI
255                 
256                 int pid, start, end; 
257                 int tag = 2001;
258                                 
259                 MPI_Status status; 
260                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
261                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
262                 
263                 //each process gets where it should start and stop in the file
264                 if (output != "square") {
265                         start = int (sqrt(float(pid)/float(processors)) * numSeqs);
266                         end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
267                 }else{
268                         start = int ((float(pid)/float(processors)) * numSeqs);
269                         end = int ((float(pid+1)/float(processors)) * numSeqs);
270                 }
271                 
272                 if (output == "column") {
273                         MPI_File outMPI;
274                         int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
275
276                         //char* filename = new char[outputFile.length()];
277                         //memcpy(filename, outputFile.c_str(), outputFile.length());
278                         
279                         char filename[1024];
280                         strcpy(filename, outputFile.c_str());
281                         
282                         MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
283                         //delete filename;
284
285                         if (pid == 0) { //you are the root process 
286                                 
287                                 //do your part
288                                 string outputMyPart;
289                                 
290                                 driverMPI(start, end, outMPI, cutoff); 
291                                 
292                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
293                         
294                                 //wait on chidren
295                                 for(int i = 1; i < processors; i++) { 
296                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
297                                         
298                                         char buf[5];
299                                         MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
300                                 }
301                         }else { //you are a child process
302                                 //do your part
303                                 driverMPI(start, end, outMPI, cutoff); 
304                                 
305                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
306                         
307                                 char buf[5];
308                                 strcpy(buf, "done"); 
309                                 //tell parent you are done.
310                                 MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
311                         }
312                         
313                         MPI_File_close(&outMPI);
314                         
315                 }else { //lower triangle format
316                         if (pid == 0) { //you are the root process 
317                         
318                                 //do your part
319                                 string outputMyPart;
320                                 unsigned long long mySize;
321                                 
322                                 if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
323                                 else { driverMPI(start, end, outputFile, mySize, output); }
324         
325                                 if (m->control_pressed) {  outputTypes.clear();   return 0; }
326                                 
327                                 int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
328                                 MPI_File outMPI;
329                                 MPI_File inMPI;
330
331                                 //char* filename = new char[outputFile.length()];
332                                 //memcpy(filename, outputFile.c_str(), outputFile.length());
333                                 
334                                 char filename[1024];
335                                 strcpy(filename, outputFile.c_str());
336
337                                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
338                                 //delete filename;
339
340                                 //wait on chidren
341                                 for(int b = 1; b < processors; b++) { 
342                                         unsigned long long fileSize;
343                                         
344                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
345                                         
346                                         MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
347                                         
348                                         string outTemp = outputFile + toString(b) + ".temp";
349
350                                         char* buf = new char[outTemp.length()];
351                                         memcpy(buf, outTemp.c_str(), outTemp.length());
352                                         
353                                         MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
354                                         delete buf;
355
356                                         int count = 0;
357                                         while (count < fileSize) { 
358                                                 char buf2[1];
359                                                 MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
360                                                 MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
361                                                 count += 1;
362                                         }
363                                         
364                                         MPI_File_close(&inMPI); //deleted on close
365                                 }
366                                 
367                                 MPI_File_close(&outMPI);
368                         }else { //you are a child process
369                                 //do your part
370                                 unsigned long long size;
371                                 if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
372                                 else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
373                                 
374                                 if (m->control_pressed) {  return 0; }
375                         
376                                 //tell parent you are done.
377                                 MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
378                         }
379                 }
380                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
381 #else           
382                                 
383         //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
384                 //if you don't need to fork anything
385                 if(processors == 1){
386                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
387                         else { driver(0, numSeqs, outputFile, "square");  }
388                 }else{ //you have multiple processors
389                         
390                         unsigned long long numDists = 0;
391                         
392                         if (output == "square") {
393                                  numDists = numSeqs * numSeqs;
394                         }else {
395                                 for(int i=0;i<numSeqs;i++){
396                                         for(int j=0;j<i;j++){
397                                                 numDists++;
398                                                 if (numDists > processors) { break; }
399                                         }
400                                 }
401                         }
402                         
403                         if (numDists < processors) { processors = numDists; }
404                         
405                         for (int i = 0; i < processors; i++) {
406                                 distlinePair tempLine;
407                                 lines.push_back(tempLine);
408                                 if (output != "square") {
409                                         lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
410                                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
411                                 }else{
412                                         lines[i].start = int ((float(i)/float(processors)) * numSeqs);
413                                         lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
414                                 }
415                                 
416                         }
417                         
418                         createProcesses(outputFile); 
419                 }
420         //#else
421                 //ifstream inFASTA;
422                 //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
423                 //else { driver(0, numSeqs, outputFile, "square");  }
424         //#endif
425         
426 #endif
427                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
428                 
429                 #ifdef USE_MPI
430                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
431                                         
432                         if (pid == 0) { //only one process should output to screen
433                 #endif
434                 
435                 //if (output == "square") {  convertMatrix(outputFile); }
436                 
437                 ifstream fileHandle;
438                 fileHandle.open(outputFile.c_str());
439                 if(fileHandle) {
440                         m->gobble(fileHandle);
441                         if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
442                 }
443                 
444                 //append the old column file to the new one
445                 if ((oldfastafile != "") && (column != ""))  {
446                         //we had to rename the column file so we didnt overwrite above, but we want to keep old name
447                         if (outputFile == column) { 
448                                 string tempcolumn = column + ".old";
449                                 m->appendFiles(tempcolumn, outputFile);
450                                 m->mothurRemove(tempcolumn);
451                         }else{
452                                 m->appendFiles(outputFile, column);
453                                 m->mothurRemove(outputFile);
454                                 outputFile = column;
455                         }
456                         
457                         if (outputDir != "") { 
458                                 string newOutputName = outputDir + m->getSimpleName(outputFile);
459                                 rename(outputFile.c_str(), newOutputName.c_str());
460                                 m->mothurRemove(outputFile);
461                                 outputFile = newOutputName;
462                         }
463                 }
464
465                 
466                 #ifdef USE_MPI
467                         }
468                 #endif
469                 
470                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
471                 
472                 //set phylip file as new current phylipfile
473                 string current = "";
474                 itTypes = outputTypes.find("phylip");
475                 if (itTypes != outputTypes.end()) {
476                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
477                 }
478                 
479                 //set column file as new current columnfile
480                 itTypes = outputTypes.find("column");
481                 if (itTypes != outputTypes.end()) {
482                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
483                 }
484                 
485                 m->mothurOutEndLine();
486                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
487                 m->mothurOut(outputFile); m->mothurOutEndLine();
488                 m->mothurOutEndLine();
489                 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
490
491
492                 if (m->isTrue(compress)) {
493                         m->mothurOut("Compressing..."); m->mothurOutEndLine();
494                         m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
495                         system(("gzip -v " + outputFile).c_str());
496                         outputNames.push_back(outputFile + ".gz");
497                 }else { outputNames.push_back(outputFile); }
498
499                 return 0;
500                 
501         }
502         catch(exception& e) {
503                 m->errorOut(e, "DistanceCommand", "execute");
504                 exit(1);
505         }
506 }
507 /**************************************************************************************************/
508 void DistanceCommand::createProcesses(string filename) {
509         try {
510 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
511                 int process = 1;
512                 processIDS.clear();
513                 
514                 //loop through and create all the processes you want
515                 while (process != processors) {
516                         int pid = fork();
517                         
518                         if (pid > 0) {
519                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
520                                 process++;
521                         }else if (pid == 0){
522                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
523                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
524                                 exit(0);
525                         }else { 
526                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
527                                 perror(" : ");
528                                 for (int i=0;i<processIDS.size();i++) {  int temp = processIDS[i]; kill (temp, SIGINT); }
529                                 exit(0);
530                         }
531                 }
532                 
533                 //parent does its part
534                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
535                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
536                 
537                 
538                 //force parent to wait until all the processes are done
539                 for (int i=0;i<processIDS.size();i++) { 
540                         int temp = processIDS[i];
541                         wait(&temp);
542                 }
543 #else
544                 //////////////////////////////////////////////////////////////////////////////////////////////////////
545                 //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
546                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
547                 //that's why the distance calculator was moved inside of the driver to make separate copies.
548                 //////////////////////////////////////////////////////////////////////////////////////////////////////
549                 
550                 vector<distanceData*> pDataArray; //[processors-1];
551                 DWORD   dwThreadIdArray[processors-1];
552                 HANDLE  hThreadArray[processors-1]; 
553                 
554                 //Create processor-1 worker threads.
555                 for( int i=0; i<processors-1; i++ ){
556                         
557                         // Allocate memory for thread data.
558                         distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
559                         pDataArray.push_back(tempDist);
560                         processIDS.push_back(i);
561                         
562                         //MyDistThreadFunction is in header. It must be global or static to work with the threads.
563                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
564                         hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
565                 }
566                 
567                 //do your part
568                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
569                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
570                 
571                 //Wait until all threads have terminated.
572                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
573                 
574                 //Close all thread handles and free memory allocations.
575                 for(int i=0; i < pDataArray.size(); i++){
576                         CloseHandle(hThreadArray[i]);
577                         delete pDataArray[i];
578                 }
579 #endif
580                 
581                 //append and remove temp files
582                 for (int i=0;i<processIDS.size();i++) { 
583                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
584                         m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
585                 }
586                 
587         }
588         catch(exception& e) {
589                 m->errorOut(e, "DistanceCommand", "createProcesses");
590                 exit(1);
591         }
592 }
593 /**************************************************************************************************/
594 /////// need to fix to work with calcs and sequencedb
595 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
596         try {
597                 ValidCalculators validCalculator;
598                 Dist* distCalculator;
599                 if (m->isTrue(countends) == true) {
600                         for (int i=0; i<Estimators.size(); i++) {
601                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
602                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
603                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
604                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
605                                 }
606                         }
607                 }else {
608                         for (int i=0; i<Estimators.size(); i++) {
609                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
610                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
611                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
612                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
613                                 }
614                         }
615                 }
616                 
617                 int startTime = time(NULL);
618                 
619                 //column file
620                 ofstream outFile(dFileName.c_str(), ios::trunc);
621                 outFile.setf(ios::fixed, ios::showpoint);
622                 outFile << setprecision(4);
623                 
624                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
625                 
626                 for(int i=startLine;i<endLine;i++){
627                         if(output == "lt")      {       
628                                 string name = alignDB.get(i).getName();
629                                 if (name.length() < 10) { //pad with spaces to make compatible
630                                         while (name.length() < 10) {  name += " ";  }
631                                 }
632                                 outFile << name << '\t';        
633                         }
634                         for(int j=0;j<i;j++){
635                                 
636                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
637                                 
638                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
639                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
640                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
641                                 
642                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
643                                 double dist = distCalculator->getDist();
644                                 
645                                 if(dist <= cutoff){
646                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
647                                 }
648                                 if (output == "lt") {  outFile << dist << '\t'; }
649                         }
650                         
651                         if (output == "lt") { outFile << endl; }
652                         
653                         if(i % 100 == 0){
654                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
655                         }
656                         
657                 }
658                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
659                 
660                 outFile.close();
661                 delete distCalculator;
662                 
663                 return 1;
664         }
665         catch(exception& e) {
666                 m->errorOut(e, "DistanceCommand", "driver");
667                 exit(1);
668         }
669 }
670 /**************************************************************************************************/
671 /////// need to fix to work with calcs and sequencedb
672 int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
673         try {
674                 ValidCalculators validCalculator;
675                 Dist* distCalculator;
676                 if (m->isTrue(countends) == true) {
677                         for (int i=0; i<Estimators.size(); i++) {
678                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
679                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
680                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
681                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
682                                 }
683                         }
684                 }else {
685                         for (int i=0; i<Estimators.size(); i++) {
686                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
687                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
688                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
689                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
690                                 }
691                         }
692                 }
693                 
694                 int startTime = time(NULL);
695                 
696                 //column file
697                 ofstream outFile(dFileName.c_str(), ios::trunc);
698                 outFile.setf(ios::fixed, ios::showpoint);
699                 outFile << setprecision(4);
700                 
701                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
702                 
703                 for(int i=startLine;i<endLine;i++){
704                                 
705                         string name = alignDB.get(i).getName();
706                         //pad with spaces to make compatible
707                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
708                                 
709                         outFile << name << '\t';        
710                         
711                         for(int j=0;j<alignDB.getNumSeqs();j++){
712                                 
713                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
714                                 
715                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
716                                 double dist = distCalculator->getDist();
717                                 
718                                 outFile << dist << '\t'; 
719                         }
720                         
721                         outFile << endl; 
722                         
723                         if(i % 100 == 0){
724                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
725                         }
726                         
727                 }
728                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
729                 
730                 outFile.close();
731                 delete distCalculator;
732                 
733                 return 1;
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "DistanceCommand", "driver");
737                 exit(1);
738         }
739 }
740 #ifdef USE_MPI
741 /**************************************************************************************************/
742 /////// need to fix to work with calcs and sequencedb
743 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
744         try {
745                 
746                 ValidCalculators validCalculator;
747                 Dist* distCalculator;
748                 if (m->isTrue(countends) == true) {
749                         for (int i=0; i<Estimators.size(); i++) {
750                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
751                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
752                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
753                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
754                                 }
755                         }
756                 }else {
757                         for (int i=0; i<Estimators.size(); i++) {
758                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
759                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
760                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
761                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
762                                 }
763                         }
764                 }
765                 
766                 
767                 MPI_Status status;
768                 int startTime = time(NULL);
769                 
770                 string outputString = "";
771                 
772                 for(int i=startLine;i<endLine;i++){
773         
774                         for(int j=0;j<i;j++){
775                                 
776                                 if (m->control_pressed) {  delete distCalculator; return 0;  }
777                                 
778                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
779                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
780                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
781                                 
782                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
783                                 double dist = distCalculator->getDist();
784                                 
785                                 if(dist <= cutoff){
786                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
787                                 }
788                         }
789                         
790                         if(i % 100 == 0){
791                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
792                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
793                         }
794                         
795                          
796                         //send results to parent
797                         int length = outputString.length();
798
799                         char* buf = new char[length];
800                         memcpy(buf, outputString.c_str(), length);
801                         
802                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
803                         outputString = "";
804                         delete buf;
805                         
806                 }
807                 
808                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
809                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;        
810                 delete distCalculator;
811                 return 1;
812         }
813         catch(exception& e) {
814                 m->errorOut(e, "DistanceCommand", "driverMPI");
815                 exit(1);
816         }
817 }
818 /**************************************************************************************************/
819 /////// need to fix to work with calcs and sequencedb
820 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size){
821         try {
822                 ValidCalculators validCalculator;
823                 Dist* distCalculator;
824                 if (m->isTrue(countends) == true) {
825                         for (int i=0; i<Estimators.size(); i++) {
826                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
827                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
828                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
829                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
830                                 }
831                         }
832                 }else {
833                         for (int i=0; i<Estimators.size(); i++) {
834                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
835                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
836                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
837                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
838                                 }
839                         }
840                 }
841                 
842                 
843                 MPI_Status status;
844                 
845                 MPI_File outMPI;
846                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
847
848                 //char* filename = new char[file.length()];
849                 //memcpy(filename, file.c_str(), file.length());
850                 
851                 char filename[1024];
852                 strcpy(filename, file.c_str());
853
854                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
855                 //delete filename;
856
857                 int startTime = time(NULL);
858                 
859                 string outputString = "";
860                 size = 0;
861                 
862                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
863                 
864                 for(int i=startLine;i<endLine;i++){
865                                 
866                         string name = alignDB.get(i).getName();
867                         if (name.length() < 10) { //pad with spaces to make compatible
868                                 while (name.length() < 10) {  name += " ";  }
869                         }
870                         outputString += name + "\t";    
871                         
872                         for(int j=0;j<i;j++){
873                                 
874                                 if (m->control_pressed) { delete distCalculator; return 0;  }
875                                 
876                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
877                                 double dist = distCalculator->getDist();
878                                 
879                                 outputString += toString(dist) + "\t"; 
880                         }
881                         
882                         outputString += "\n"; 
883
884                 
885                         if(i % 100 == 0){
886                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
887                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
888                         }
889                         
890                         
891                         //send results to parent
892                         int length = outputString.length();
893                         char* buf = new char[length];
894                         memcpy(buf, outputString.c_str(), length);
895                         
896                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
897                         size += outputString.length();
898                         outputString = "";
899                         delete buf;
900                 }
901                 
902                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
903                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
904                 MPI_File_close(&outMPI);
905                 delete distCalculator;
906                 
907                 return 1;
908         }
909         catch(exception& e) {
910                 m->errorOut(e, "DistanceCommand", "driverMPI");
911                 exit(1);
912         }
913 }
914 /**************************************************************************************************/
915 /////// need to fix to work with calcs and sequencedb
916 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){
917         try {
918                 ValidCalculators validCalculator;
919                 Dist* distCalculator;
920                 if (m->isTrue(countends) == true) {
921                         for (int i=0; i<Estimators.size(); i++) {
922                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
923                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
924                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
925                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
926                                 }
927                         }
928                 }else {
929                         for (int i=0; i<Estimators.size(); i++) {
930                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
931                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
932                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
933                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
934                                 }
935                         }
936                 }
937                 
938                 MPI_Status status;
939                 
940                 MPI_File outMPI;
941                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
942
943                 //char* filename = new char[file.length()];
944                 //memcpy(filename, file.c_str(), file.length());
945                 
946                 char filename[1024];
947                 strcpy(filename, file.c_str());
948
949                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
950                 //delete filename;
951
952                 int startTime = time(NULL);
953                 
954                 string outputString = "";
955                 size = 0;
956                 
957                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
958                 
959                 for(int i=startLine;i<endLine;i++){
960                                 
961                         string name = alignDB.get(i).getName();
962                         if (name.length() < 10) { //pad with spaces to make compatible
963                                 while (name.length() < 10) {  name += " ";  }
964                         }
965                         outputString += name + "\t";    
966                         
967                         for(int j=0;j<alignDB.getNumSeqs();j++){
968                                 
969                                 if (m->control_pressed) { delete distCalculator; return 0;  }
970                                 
971                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
972                                 double dist = distCalculator->getDist();
973                                 
974                                 outputString += toString(dist) + "\t"; 
975                         }
976                         
977                         outputString += "\n"; 
978
979                 
980                         if(i % 100 == 0){
981                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
982                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
983                         }
984                         
985                         
986                         //send results to parent
987                         int length = outputString.length();
988                         char* buf = new char[length];
989                         memcpy(buf, outputString.c_str(), length);
990                         
991                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
992                         size += outputString.length();
993                         outputString = "";
994                         delete buf;
995                 }
996                 
997                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
998                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
999                 MPI_File_close(&outMPI);
1000                 delete distCalculator;
1001                 return 1;
1002         }
1003         catch(exception& e) {
1004                 m->errorOut(e, "DistanceCommand", "driverMPI");
1005                 exit(1);
1006         }
1007 }
1008 #endif
1009 /**************************************************************************************************
1010 int DistanceCommand::convertMatrix(string outputFile) {
1011         try{
1012
1013                 //sort file by first column so the distances for each row are together
1014                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1015                 
1016                 //use the unix sort 
1017                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1018                         string command = "sort -n " + outputFile + " -o " + outfile;
1019                         system(command.c_str());
1020                 #else //sort using windows sort
1021                         string command = "sort " + outputFile + " /O " + outfile;
1022                         system(command.c_str());
1023                 #endif
1024                 
1025
1026                 //output to new file distance for each row and save positions in file where new row begins
1027                 ifstream in;
1028                 m->openInputFile(outfile, in);
1029                 
1030                 ofstream out;
1031                 m->openOutputFile(outputFile, out);
1032                 
1033                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1034
1035                 out << alignDB.getNumSeqs() << endl;
1036                 
1037                 //get first currentRow
1038                 string first, currentRow, second;
1039                 float dist;
1040                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1041                 map<string, float>::iterator it;
1042                 
1043                 in >> first;
1044                 currentRow = first;
1045                 
1046                 rowDists[first] = 0.00; //distance to yourself is 0.0
1047                 
1048                 in.seekg(0);
1049                 //m->openInputFile(outfile, in);
1050                 
1051                 while(!in.eof()) {
1052                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1053                         
1054                         in >> first >> second >> dist; m->gobble(in);
1055                                 
1056                         if (first != currentRow) {
1057                                 //print out last row
1058                                 out << currentRow << '\t'; //print name
1059
1060                                 //print dists
1061                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1062                                         out << it->second << '\t';
1063                                 }
1064                                 out << endl;
1065                                 
1066                                 //start new row
1067                                 currentRow = first;
1068                                 rowDists.clear();
1069                                 rowDists[first] = 0.00;
1070                                 rowDists[second] = dist;
1071                         }else{
1072                                 rowDists[second] = dist;
1073                         }
1074                 }
1075                 //print out last row
1076                 out << currentRow << '\t'; //print name
1077                                 
1078                 //print dists
1079                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1080                         out << it->second << '\t';
1081                 }
1082                 out << endl;
1083                 
1084                 in.close();
1085                 out.close();
1086                 
1087                 m->mothurRemove(outfile);
1088                 
1089                 return 1;
1090                 
1091         }
1092         catch(exception& e) {
1093                 m->errorOut(e, "DistanceCommand", "convertMatrix");
1094                 exit(1);
1095         }
1096 }
1097 /**************************************************************************************************
1098 int DistanceCommand::convertToLowerTriangle(string outputFile) {
1099         try{
1100
1101                 //sort file by first column so the distances for each row are together
1102                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1103                 
1104                 //use the unix sort 
1105                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1106                         string command = "sort -n " + outputFile + " -o " + outfile;
1107                         system(command.c_str());
1108                 #else //sort using windows sort
1109                         string command = "sort " + outputFile + " /O " + outfile;
1110                         system(command.c_str());
1111                 #endif
1112                 
1113
1114                 //output to new file distance for each row and save positions in file where new row begins
1115                 ifstream in;
1116                 m->openInputFile(outfile, in);
1117                 
1118                 ofstream out;
1119                 m->openOutputFile(outputFile, out);
1120                 
1121                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1122
1123                 out << alignDB.getNumSeqs() << endl;
1124                 
1125                 //get first currentRow
1126                 string first, currentRow, second;
1127                 float dist;
1128                 int i, j;
1129                 i = 0; j = 0;
1130                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1131                 map<string, float>::iterator it;
1132                 
1133                 in >> first;
1134                 currentRow = first;
1135                 
1136                 rowDists[first] = 0.00; //distance to yourself is 0.0
1137                 
1138                 in.seekg(0);
1139                 //m->openInputFile(outfile, in);
1140                 
1141                 while(!in.eof()) {
1142                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1143                         
1144                         in >> first >> second >> dist; m->gobble(in);
1145                                 
1146                         if (first != currentRow) {
1147                                 //print out last row
1148                                 out << currentRow << '\t'; //print name
1149
1150                                 //print dists
1151                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1152                                         if (j >= i) { break; }
1153                                         out << it->second << '\t';
1154                                         j++;
1155                                 }
1156                                 out << endl;
1157                                 
1158                                 //start new row
1159                                 currentRow = first;
1160                                 rowDists.clear();
1161                                 rowDists[first] = 0.00;
1162                                 rowDists[second] = dist;
1163                                 j = 0;
1164                                 i++;
1165                         }else{
1166                                 rowDists[second] = dist;
1167                         }
1168                 }
1169                 //print out last row
1170                 out << currentRow << '\t'; //print name
1171                                 
1172                 //print dists
1173                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1174                         out << it->second << '\t';
1175                 }
1176                 out << endl;
1177                 
1178                 in.close();
1179                 out.close();
1180                 
1181                 m->mothurRemove(outfile);
1182                 
1183                 return 1;
1184                 
1185         }
1186         catch(exception& e) {
1187                 m->errorOut(e, "DistanceCommand", "convertToLowerTriangle");
1188                 exit(1);
1189         }
1190 }
1191 /**************************************************************************************************/
1192 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
1193 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
1194 //also check to make sure the 2 files have the same alignment length.
1195 bool DistanceCommand::sanityCheck() {
1196         try{
1197                 bool good = true;
1198                 
1199                 //make sure the 2 fasta files have the same alignment length
1200                 ifstream in;
1201                 m->openInputFile(fastafile, in);
1202                 int fastaAlignLength = 0;
1203                 if (in) { 
1204                         Sequence tempIn(in);
1205                         fastaAlignLength = tempIn.getAligned().length();
1206                 }
1207                 in.close();
1208                 
1209                 ifstream in2;
1210                 m->openInputFile(oldfastafile, in2);
1211                 int oldfastaAlignLength = 0;
1212                 if (in2) { 
1213                         Sequence tempIn2(in2);
1214                         oldfastaAlignLength = tempIn2.getAligned().length();
1215                 }
1216                 in2.close();
1217                 
1218                 if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length."); m->mothurOutEndLine(); return false;  }
1219                 
1220                 //read fasta file and save names as well as adding them to the alignDB
1221                 set<string> namesOldFasta;
1222                 
1223                 ifstream inFasta;
1224                 m->openInputFile(oldfastafile, inFasta);
1225                 
1226                 while (!inFasta.eof()) {
1227                         if (m->control_pressed) {  inFasta.close(); return good;  }
1228                 
1229                         Sequence temp(inFasta);
1230                         
1231                         if (temp.getName() != "") {
1232                                 namesOldFasta.insert(temp.getName());  //save name
1233                                 alignDB.push_back(temp);  //add to DB
1234                         }
1235                         
1236                         m->gobble(inFasta);
1237                 }
1238                 
1239                 inFasta.close();
1240                 
1241                 //read through the column file checking names and removing distances above the cutoff
1242                 ifstream inDist;
1243                 m->openInputFile(column, inDist);
1244                 
1245                 ofstream outDist;
1246                 string outputFile = column + ".temp";
1247                 m->openOutputFile(outputFile, outDist);
1248                 
1249                 string name1, name2;
1250                 float dist;
1251                 while (!inDist.eof()) {
1252                         if (m->control_pressed) {  inDist.close(); outDist.close(); m->mothurRemove(outputFile); return good;  }
1253                 
1254                         inDist >> name1 >> name2 >> dist; m->gobble(inDist);
1255                         
1256                         //both names are in fasta file and distance is below cutoff
1257                         if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) {  good = false; break;  }
1258                         else{
1259                                 if (dist <= cutoff) {
1260                                         outDist << name1 << '\t' << name2 << '\t' << dist << endl;
1261                                 }
1262                         }
1263                 }
1264                 
1265                 inDist.close();
1266                 outDist.close();
1267                 
1268                 if (good) {
1269                         m->mothurRemove(column);
1270                         rename(outputFile.c_str(), column.c_str());
1271                 }else{
1272                         m->mothurRemove(outputFile); //temp file is bad because file mismatch above
1273                 }
1274                 
1275                 return good;
1276                 
1277         }
1278         catch(exception& e) {
1279                 m->errorOut(e, "DistanceCommand", "sanityCheck");
1280                 exit(1);
1281         }
1282 }
1283 /**************************************************************************************************/
1284
1285
1286
1287