]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> DistanceCommand::setParameters(){        
14         try {
15                 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn","column",false,false); parameters.push_back(pcolumn);
16                 CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn","",false,false); parameters.push_back(poldfasta);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true, true); parameters.push_back(pfasta);
18                 CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false, true); parameters.push_back(poutput);
19                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc);
20                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends);
21                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false, true); parameters.push_back(pprocessors);
23                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false, true); parameters.push_back(pcutoff);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "DistanceCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string DistanceCommand::getHelpString(){        
38         try {
39                 string helpString = "";
40                 helpString += "The dist.seqs command reads a file containing sequences and creates a distance file.\n";
41                 helpString += "The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors.  \n";
42                 helpString += "The fasta parameter is required, unless you have a valid current fasta file.\n";
43                 helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n";
44                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
45                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
46                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
47                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
48                 helpString += "The processors parameter allows you to specify number of processors to use.  The default is 1.\n";
49                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
50                 helpString += "The dist.seqs command should be in the following format: \n";
51                 helpString += "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n";
52                 helpString += "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "DistanceCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string DistanceCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "phylip") {  pattern = "[filename],[outputtag],dist"; } 
67         else if (type == "column") { pattern = "[filename],dist"; }
68         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
69         
70         return pattern;
71     }
72     catch(exception& e) {
73         m->errorOut(e, "DistanceCommand", "getOutputPattern");
74         exit(1);
75     }
76 }
77 //**********************************************************************************************************************
78 DistanceCommand::DistanceCommand(){     
79         try {
80                 abort = true; calledHelp = true; 
81                 setParameters();
82                 vector<string> tempOutNames;
83                 outputTypes["phylip"] = tempOutNames;
84                 outputTypes["column"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 DistanceCommand::DistanceCommand(string option) {
93         try {
94                 abort = false; calledHelp = false;   
95                 Estimators.clear();
96                                 
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string, string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter("dist.seqs");
108                         map<string, string>::iterator it2;
109                 
110                         //check to make sure all parameters are valid for command
111                         for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
112                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["phylip"] = tempOutNames;
118                         outputTypes["column"] = tempOutNames;
119                 
120                         //if the user changes the input directory command factory will send this info to us in the output parameter 
121                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
122                         if (inputDir == "not found"){   inputDir = "";          }
123                         else {
124                                 string path;
125                                 it2 = parameters.find("fasta");
126                                 //user has given a template file
127                                 if(it2 != parameters.end()){ 
128                                         path = m->hasPath(it2->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["fasta"] = inputDir + it2->second;           }
131                                 }
132                                 
133                                 it2 = parameters.find("oldfasta");
134                                 //user has given a template file
135                                 if(it2 != parameters.end()){ 
136                                         path = m->hasPath(it2->second);
137                                         //if the user has not given a path then, add inputdir. else leave path alone.
138                                         if (path == "") {       parameters["oldfasta"] = inputDir + it2->second;                }
139                                 }
140                                 
141                                 it2 = parameters.find("column");
142                                 //user has given a template file
143                                 if(it2 != parameters.end()){ 
144                                         path = m->hasPath(it2->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["column"] = inputDir + it2->second;          }
147                                 }
148                         }
149
150                         //check for required parameters
151                         fastafile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastafile == "not found") {                                 
153                                 fastafile = m->getFastaFile(); 
154                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); 
155                                         ifstream inFASTA;
156                                         m->openInputFile(fastafile, inFASTA);
157                                         alignDB = SequenceDB(inFASTA); 
158                                         inFASTA.close();
159                                 }else {         m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
160                         }else if (fastafile == "not open") { abort = true; }    
161                         else{
162                                 ifstream inFASTA;
163                                 m->openInputFile(fastafile, inFASTA);
164                                 alignDB = SequenceDB(inFASTA); 
165                                 inFASTA.close();
166                                 m->setFastaFile(fastafile);
167                         }
168                         
169                         oldfastafile = validParameter.validFile(parameters, "oldfasta", true);
170                         if (oldfastafile == "not found") { oldfastafile = ""; }
171                         else if (oldfastafile == "not open") { abort = true; }  
172                         
173                         column = validParameter.validFile(parameters, "column", true);
174                         if (column == "not found") { column = ""; }
175                         else if (column == "not open") { abort = true; }        
176                         else { m->setColumnFile(column); }
177                         
178                         //if the user changes the output directory command factory will send this info to us in the output parameter 
179                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
180                                 outputDir = ""; 
181                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
182                         }
183
184                         //check for optional parameter and set defaults
185                         // ...at some point should added some additional type checking...
186                         calc = validParameter.validFile(parameters, "calc", false);                     
187                         if (calc == "not found") { calc = "onegap";  }
188                         else { 
189                                  if (calc == "default")  {  calc = "onegap";  }
190                         }
191                         m->splitAtDash(calc, Estimators);
192
193                         string temp;
194                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
195                         convert(temp, countends); 
196                         
197                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
198                         m->mothurConvert(temp, cutoff); 
199                         
200                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
201                         m->setProcessors(temp);
202                         m->mothurConvert(temp, processors);
203                         
204                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
205                         convert(temp, compress);
206
207                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
208             if (output == "phylip") { output = "lt";  }
209                         
210                         if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
211                         
212                         if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
213                         
214                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
215
216                 }
217                                 
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
221                 exit(1);
222         }
223 }
224 //**********************************************************************************************************************
225
226 int DistanceCommand::execute(){
227         try {
228                 
229                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
230                 
231                 int startTime = time(NULL);
232                 
233                 //save number of new sequence
234                 numNewFasta = alignDB.getNumSeqs();
235                 
236                 //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
237                 if ((oldfastafile != "") && (column != ""))  {  if (!(sanityCheck())) { return 0; }  }
238                 
239                 if (m->control_pressed) { return 0; }
240                 
241                 int numSeqs = alignDB.getNumSeqs();
242                 cutoff += 0.005;
243                 
244                 if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
245                 
246                 string outputFile;
247         
248         map<string, string> variables; 
249         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
250                 if (output == "lt") { //does the user want lower triangle phylip formatted file 
251             variables["[outputtag]"] = "phylip";
252                         outputFile = getOutputFileName("phylip", variables);
253                         m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
254                         
255                         //output numSeqs to phylip formatted dist file
256                 }else if (output == "column") { //user wants column format
257                         outputFile = getOutputFileName("column", variables);
258                         outputTypes["column"].push_back(outputFile);
259                         
260                         //so we don't accidentally overwrite
261                         if (outputFile == column) { 
262                                 string tempcolumn = column + ".old"; 
263                                 rename(column.c_str(), tempcolumn.c_str());
264                         }
265                         
266                         m->mothurRemove(outputFile);
267                 }else { //assume square
268                         variables["[outputtag]"] = "square";
269                         outputFile = getOutputFileName("phylip", variables);
270                         m->mothurRemove(outputFile);
271                         outputTypes["phylip"].push_back(outputFile);
272                 }
273                 
274
275 #ifdef USE_MPI
276                 
277                 int pid, start, end; 
278                 int tag = 2001;
279                                 
280                 MPI_Status status; 
281                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
282                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
283                 
284                 //each process gets where it should start and stop in the file
285                 if (output != "square") {
286                         start = int (sqrt(float(pid)/float(processors)) * numSeqs);
287                         end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
288                 }else{
289                         start = int ((float(pid)/float(processors)) * numSeqs);
290                         end = int ((float(pid+1)/float(processors)) * numSeqs);
291                 }
292                 
293                 if (output == "column") {
294                         MPI_File outMPI;
295                         int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
296
297                         //char* filename = new char[outputFile.length()];
298                         //memcpy(filename, outputFile.c_str(), outputFile.length());
299                         
300                         char filename[1024];
301                         strcpy(filename, outputFile.c_str());
302                         
303                         MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
304                         //delete filename;
305
306                         if (pid == 0) { //you are the root process 
307                                 
308                                 //do your part
309                                 string outputMyPart;
310                                 
311                                 driverMPI(start, end, outMPI, cutoff); 
312                                 
313                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
314                         
315                                 //wait on chidren
316                                 for(int i = 1; i < processors; i++) { 
317                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
318                                         
319                                         char buf[5];
320                                         MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
321                                 }
322                         }else { //you are a child process
323                                 //do your part
324                                 driverMPI(start, end, outMPI, cutoff); 
325                                 
326                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
327                         
328                                 char buf[5];
329                                 strcpy(buf, "done"); 
330                                 //tell parent you are done.
331                                 MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
332                         }
333                         
334                         MPI_File_close(&outMPI);
335                         
336                 }else { //lower triangle format
337                         if (pid == 0) { //you are the root process 
338                         
339                                 //do your part
340                                 string outputMyPart;
341                                 unsigned long long mySize;
342                                 
343                                 if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
344                                 else { driverMPI(start, end, outputFile, mySize, output); }
345         
346                                 if (m->control_pressed) {  outputTypes.clear();   return 0; }
347                                 
348                                 int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
349                                 MPI_File outMPI;
350                                 MPI_File inMPI;
351
352                                 //char* filename = new char[outputFile.length()];
353                                 //memcpy(filename, outputFile.c_str(), outputFile.length());
354                                 
355                                 char filename[1024];
356                                 strcpy(filename, outputFile.c_str());
357
358                                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
359                                 //delete filename;
360
361                                 //wait on chidren
362                                 for(int b = 1; b < processors; b++) { 
363                                         unsigned long long fileSize;
364                                         
365                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
366                                         
367                                         MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
368                                         
369                                         string outTemp = outputFile + toString(b) + ".temp";
370
371                                         char* buf = new char[outTemp.length()];
372                                         memcpy(buf, outTemp.c_str(), outTemp.length());
373                                         
374                                         MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
375                                         delete buf;
376
377                                         int count = 0;
378                                         while (count < fileSize) { 
379                                                 char buf2[1];
380                                                 MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
381                                                 MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
382                                                 count += 1;
383                                         }
384                                         
385                                         MPI_File_close(&inMPI); //deleted on close
386                                 }
387                                 
388                                 MPI_File_close(&outMPI);
389                         }else { //you are a child process
390                                 //do your part
391                                 unsigned long long size;
392                                 if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
393                                 else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
394                                 
395                                 if (m->control_pressed) {  return 0; }
396                         
397                                 //tell parent you are done.
398                                 MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
399                         }
400                 }
401                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
402 #else           
403                                 
404         //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
405                 //if you don't need to fork anything
406                 if(processors == 1){
407                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
408                         else { driver(0, numSeqs, outputFile, "square");  }
409                 }else{ //you have multiple processors
410                         
411                         unsigned long long numDists = 0;
412                         
413                         if (output == "square") {
414                                  numDists = numSeqs * numSeqs;
415                         }else {
416                                 for(int i=0;i<numSeqs;i++){
417                                         for(int j=0;j<i;j++){
418                                                 numDists++;
419                                                 if (numDists > processors) { break; }
420                                         }
421                                 }
422                         }
423                         
424                         if (numDists < processors) { processors = numDists; }
425                         
426                         for (int i = 0; i < processors; i++) {
427                                 distlinePair tempLine;
428                                 lines.push_back(tempLine);
429                                 if (output != "square") {
430                                         lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
431                                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
432                                 }else{
433                                         lines[i].start = int ((float(i)/float(processors)) * numSeqs);
434                                         lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
435                                 }
436                                 
437                         }
438                         
439                         createProcesses(outputFile); 
440                 }
441         //#else
442                 //ifstream inFASTA;
443                 //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
444                 //else { driver(0, numSeqs, outputFile, "square");  }
445         //#endif
446         
447 #endif
448                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
449                 
450                 #ifdef USE_MPI
451                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
452                                         
453                         if (pid == 0) { //only one process should output to screen
454                 #endif
455                 
456                 //if (output == "square") {  convertMatrix(outputFile); }
457                 
458                 ifstream fileHandle;
459                 fileHandle.open(outputFile.c_str());
460                 if(fileHandle) {
461                         m->gobble(fileHandle);
462                         if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
463                 }
464                 
465                 //append the old column file to the new one
466                 if ((oldfastafile != "") && (column != ""))  {
467                         //we had to rename the column file so we didnt overwrite above, but we want to keep old name
468                         if (outputFile == column) { 
469                                 string tempcolumn = column + ".old";
470                                 m->appendFiles(tempcolumn, outputFile);
471                                 m->mothurRemove(tempcolumn);
472                         }else{
473                                 m->appendFiles(outputFile, column);
474                                 m->mothurRemove(outputFile);
475                                 outputFile = column;
476                         }
477                         
478                         if (outputDir != "") { 
479                                 string newOutputName = outputDir + m->getSimpleName(outputFile);
480                                 rename(outputFile.c_str(), newOutputName.c_str());
481                                 m->mothurRemove(outputFile);
482                                 outputFile = newOutputName;
483                         }
484                 }
485
486                 
487                 #ifdef USE_MPI
488                         }
489                 #endif
490                 
491                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
492                 
493                 //set phylip file as new current phylipfile
494                 string current = "";
495                 itTypes = outputTypes.find("phylip");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
498                 }
499                 
500                 //set column file as new current columnfile
501                 itTypes = outputTypes.find("column");
502                 if (itTypes != outputTypes.end()) {
503                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
504                 }
505                 
506                 m->mothurOutEndLine();
507                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
508                 m->mothurOut(outputFile); m->mothurOutEndLine();
509                 m->mothurOutEndLine();
510                 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
511
512
513                 if (m->isTrue(compress)) {
514                         m->mothurOut("Compressing..."); m->mothurOutEndLine();
515                         m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
516                         system(("gzip -v " + outputFile).c_str());
517                         outputNames.push_back(outputFile + ".gz");
518                 }else { outputNames.push_back(outputFile); }
519
520                 return 0;
521                 
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "DistanceCommand", "execute");
525                 exit(1);
526         }
527 }
528 /**************************************************************************************************/
529 void DistanceCommand::createProcesses(string filename) {
530         try {
531 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
532                 int process = 1;
533                 processIDS.clear();
534                 
535                 //loop through and create all the processes you want
536                 while (process != processors) {
537                         int pid = fork();
538                         
539                         if (pid > 0) {
540                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
541                                 process++;
542                         }else if (pid == 0){
543                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
544                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
545                                 exit(0);
546                         }else { 
547                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
548                                 perror(" : ");
549                                 for (int i=0;i<processIDS.size();i++) {  int temp = processIDS[i]; kill (temp, SIGINT); }
550                                 exit(0);
551                         }
552                 }
553                 
554                 //parent does its part
555                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
556                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
557                 
558                 
559                 //force parent to wait until all the processes are done
560                 for (int i=0;i<processIDS.size();i++) { 
561                         int temp = processIDS[i];
562                         wait(&temp);
563                 }
564 #else
565                 //////////////////////////////////////////////////////////////////////////////////////////////////////
566                 //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
567                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
568                 //that's why the distance calculator was moved inside of the driver to make separate copies.
569                 //////////////////////////////////////////////////////////////////////////////////////////////////////
570                 
571                 vector<distanceData*> pDataArray; //[processors-1];
572                 DWORD   dwThreadIdArray[processors-1];
573                 HANDLE  hThreadArray[processors-1]; 
574                 
575                 //Create processor-1 worker threads.
576                 for( int i=0; i<processors-1; i++ ){
577                         
578                         // Allocate memory for thread data.
579                         distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
580                         pDataArray.push_back(tempDist);
581                         processIDS.push_back(i);
582                         
583                         //MyDistThreadFunction is in header. It must be global or static to work with the threads.
584                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
585                         hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
586                 }
587                 
588                 //do your part
589                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
590                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
591                 
592                 //Wait until all threads have terminated.
593                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
594                 
595                 //Close all thread handles and free memory allocations.
596                 for(int i=0; i < pDataArray.size(); i++){
597             if (pDataArray[i]->count != (pDataArray[i]->endLine-pDataArray[i]->startLine)) {
598                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->endLine-pDataArray[i]->startLine) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
599             }
600                         CloseHandle(hThreadArray[i]);
601                         delete pDataArray[i];
602                 }
603 #endif
604                 
605                 //append and remove temp files
606                 for (int i=0;i<processIDS.size();i++) { 
607                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
608                         m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
609                 }
610                 
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "DistanceCommand", "createProcesses");
614                 exit(1);
615         }
616 }
617 /**************************************************************************************************/
618 /////// need to fix to work with calcs and sequencedb
619 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
620         try {
621                 ValidCalculators validCalculator;
622                 Dist* distCalculator;
623                 if (m->isTrue(countends) == true) {
624                         for (int i=0; i<Estimators.size(); i++) {
625                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
626                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
627                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
628                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
629                                 }
630                         }
631                 }else {
632                         for (int i=0; i<Estimators.size(); i++) {
633                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
634                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
635                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
636                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
637                                 }
638                         }
639                 }
640                 
641                 int startTime = time(NULL);
642                 
643                 //column file
644                 ofstream outFile(dFileName.c_str(), ios::trunc);
645                 outFile.setf(ios::fixed, ios::showpoint);
646                 outFile << setprecision(4);
647                 
648                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
649                 
650                 for(int i=startLine;i<endLine;i++){
651                         if(output == "lt")      {       
652                                 string name = alignDB.get(i).getName();
653                                 if (name.length() < 10) { //pad with spaces to make compatible
654                                         while (name.length() < 10) {  name += " ";  }
655                                 }
656                                 outFile << name << '\t';        
657                         }
658                         for(int j=0;j<i;j++){
659                                 
660                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
661                                 
662                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
663                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
664                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
665                                 
666                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
667                                 double dist = distCalculator->getDist();
668                                 
669                                 if(dist <= cutoff){
670                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
671                                 }
672                                 if (output == "lt") {  outFile << dist << '\t'; }
673                         }
674                         
675                         if (output == "lt") { outFile << endl; }
676                         
677                         if(i % 100 == 0){
678                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
679                         }
680                         
681                 }
682                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
683                 
684                 outFile.close();
685                 delete distCalculator;
686                 
687                 return 1;
688         }
689         catch(exception& e) {
690                 m->errorOut(e, "DistanceCommand", "driver");
691                 exit(1);
692         }
693 }
694 /**************************************************************************************************/
695 /////// need to fix to work with calcs and sequencedb
696 int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
697         try {
698                 ValidCalculators validCalculator;
699                 Dist* distCalculator;
700                 if (m->isTrue(countends) == true) {
701                         for (int i=0; i<Estimators.size(); i++) {
702                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
703                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
704                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
705                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
706                                 }
707                         }
708                 }else {
709                         for (int i=0; i<Estimators.size(); i++) {
710                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
711                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
712                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
713                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
714                                 }
715                         }
716                 }
717                 
718                 int startTime = time(NULL);
719                 
720                 //column file
721                 ofstream outFile(dFileName.c_str(), ios::trunc);
722                 outFile.setf(ios::fixed, ios::showpoint);
723                 outFile << setprecision(4);
724                 
725                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
726                 
727                 for(int i=startLine;i<endLine;i++){
728                                 
729                         string name = alignDB.get(i).getName();
730                         //pad with spaces to make compatible
731                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
732                                 
733                         outFile << name << '\t';        
734                         
735                         for(int j=0;j<alignDB.getNumSeqs();j++){
736                                 
737                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
738                                 
739                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
740                                 double dist = distCalculator->getDist();
741                                 
742                                 outFile << dist << '\t'; 
743                         }
744                         
745                         outFile << endl; 
746                         
747                         if(i % 100 == 0){
748                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
749                         }
750                         
751                 }
752                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
753                 
754                 outFile.close();
755                 delete distCalculator;
756                 
757                 return 1;
758         }
759         catch(exception& e) {
760                 m->errorOut(e, "DistanceCommand", "driver");
761                 exit(1);
762         }
763 }
764 #ifdef USE_MPI
765 /**************************************************************************************************/
766 /////// need to fix to work with calcs and sequencedb
767 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
768         try {
769                 
770                 ValidCalculators validCalculator;
771                 Dist* distCalculator;
772                 if (m->isTrue(countends) == true) {
773                         for (int i=0; i<Estimators.size(); i++) {
774                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
775                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
776                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
777                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
778                                 }
779                         }
780                 }else {
781                         for (int i=0; i<Estimators.size(); i++) {
782                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
783                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
784                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
785                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
786                                 }
787                         }
788                 }
789                 
790                 
791                 MPI_Status status;
792                 int startTime = time(NULL);
793                 
794                 string outputString = "";
795                 
796                 for(int i=startLine;i<endLine;i++){
797         
798                         for(int j=0;j<i;j++){
799                                 
800                                 if (m->control_pressed) {  delete distCalculator; return 0;  }
801                                 
802                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
803                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
804                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
805                                 
806                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
807                                 double dist = distCalculator->getDist();
808                                 
809                                 if(dist <= cutoff){
810                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
811                                 }
812                         }
813                         
814                         if(i % 100 == 0){
815                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
816                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
817                         }
818                         
819                          
820                         //send results to parent
821                         int length = outputString.length();
822
823                         char* buf = new char[length];
824                         memcpy(buf, outputString.c_str(), length);
825                         
826                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
827                         outputString = "";
828                         delete buf;
829                         
830                 }
831                 
832                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
833                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;        
834                 delete distCalculator;
835                 return 1;
836         }
837         catch(exception& e) {
838                 m->errorOut(e, "DistanceCommand", "driverMPI");
839                 exit(1);
840         }
841 }
842 /**************************************************************************************************/
843 /////// need to fix to work with calcs and sequencedb
844 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size){
845         try {
846                 ValidCalculators validCalculator;
847                 Dist* distCalculator;
848                 if (m->isTrue(countends) == true) {
849                         for (int i=0; i<Estimators.size(); i++) {
850                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
851                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
852                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
853                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
854                                 }
855                         }
856                 }else {
857                         for (int i=0; i<Estimators.size(); i++) {
858                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
859                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
860                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
861                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
862                                 }
863                         }
864                 }
865                 
866                 
867                 MPI_Status status;
868                 
869                 MPI_File outMPI;
870                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
871
872                 //char* filename = new char[file.length()];
873                 //memcpy(filename, file.c_str(), file.length());
874                 
875                 char filename[1024];
876                 strcpy(filename, file.c_str());
877
878                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
879                 //delete filename;
880
881                 int startTime = time(NULL);
882                 
883                 string outputString = "";
884                 size = 0;
885                 
886                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
887                 
888                 for(int i=startLine;i<endLine;i++){
889                                 
890                         string name = alignDB.get(i).getName();
891                         if (name.length() < 10) { //pad with spaces to make compatible
892                                 while (name.length() < 10) {  name += " ";  }
893                         }
894                         outputString += name + "\t";    
895                         
896                         for(int j=0;j<i;j++){
897                                 
898                                 if (m->control_pressed) { delete distCalculator; return 0;  }
899                                 
900                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
901                                 double dist = distCalculator->getDist();
902                                 
903                                 outputString += toString(dist) + "\t"; 
904                         }
905                         
906                         outputString += "\n"; 
907
908                 
909                         if(i % 100 == 0){
910                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
911                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
912                         }
913                         
914                         
915                         //send results to parent
916                         int length = outputString.length();
917                         char* buf = new char[length];
918                         memcpy(buf, outputString.c_str(), length);
919                         
920                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
921                         size += outputString.length();
922                         outputString = "";
923                         delete buf;
924                 }
925                 
926                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
927                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
928                 MPI_File_close(&outMPI);
929                 delete distCalculator;
930                 
931                 return 1;
932         }
933         catch(exception& e) {
934                 m->errorOut(e, "DistanceCommand", "driverMPI");
935                 exit(1);
936         }
937 }
938 /**************************************************************************************************/
939 /////// need to fix to work with calcs and sequencedb
940 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){
941         try {
942                 ValidCalculators validCalculator;
943                 Dist* distCalculator;
944                 if (m->isTrue(countends) == true) {
945                         for (int i=0; i<Estimators.size(); i++) {
946                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
947                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
948                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
949                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
950                                 }
951                         }
952                 }else {
953                         for (int i=0; i<Estimators.size(); i++) {
954                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
955                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
956                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
957                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
958                                 }
959                         }
960                 }
961                 
962                 MPI_Status status;
963                 
964                 MPI_File outMPI;
965                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
966
967                 //char* filename = new char[file.length()];
968                 //memcpy(filename, file.c_str(), file.length());
969                 
970                 char filename[1024];
971                 strcpy(filename, file.c_str());
972
973                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
974                 //delete filename;
975
976                 int startTime = time(NULL);
977                 
978                 string outputString = "";
979                 size = 0;
980                 
981                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
982                 
983                 for(int i=startLine;i<endLine;i++){
984                                 
985                         string name = alignDB.get(i).getName();
986                         if (name.length() < 10) { //pad with spaces to make compatible
987                                 while (name.length() < 10) {  name += " ";  }
988                         }
989                         outputString += name + "\t";    
990                         
991                         for(int j=0;j<alignDB.getNumSeqs();j++){
992                                 
993                                 if (m->control_pressed) { delete distCalculator; return 0;  }
994                                 
995                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
996                                 double dist = distCalculator->getDist();
997                                 
998                                 outputString += toString(dist) + "\t"; 
999                         }
1000                         
1001                         outputString += "\n"; 
1002
1003                 
1004                         if(i % 100 == 0){
1005                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
1006                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
1007                         }
1008                         
1009                         
1010                         //send results to parent
1011                         int length = outputString.length();
1012                         char* buf = new char[length];
1013                         memcpy(buf, outputString.c_str(), length);
1014                         
1015                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
1016                         size += outputString.length();
1017                         outputString = "";
1018                         delete buf;
1019                 }
1020                 
1021                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
1022                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
1023                 MPI_File_close(&outMPI);
1024                 delete distCalculator;
1025                 return 1;
1026         }
1027         catch(exception& e) {
1028                 m->errorOut(e, "DistanceCommand", "driverMPI");
1029                 exit(1);
1030         }
1031 }
1032 #endif
1033 /**************************************************************************************************
1034 int DistanceCommand::convertMatrix(string outputFile) {
1035         try{
1036
1037                 //sort file by first column so the distances for each row are together
1038                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1039                 
1040                 //use the unix sort 
1041                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1042                         string command = "sort -n " + outputFile + " -o " + outfile;
1043                         system(command.c_str());
1044                 #else //sort using windows sort
1045                         string command = "sort " + outputFile + " /O " + outfile;
1046                         system(command.c_str());
1047                 #endif
1048                 
1049
1050                 //output to new file distance for each row and save positions in file where new row begins
1051                 ifstream in;
1052                 m->openInputFile(outfile, in);
1053                 
1054                 ofstream out;
1055                 m->openOutputFile(outputFile, out);
1056                 
1057                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1058
1059                 out << alignDB.getNumSeqs() << endl;
1060                 
1061                 //get first currentRow
1062                 string first, currentRow, second;
1063                 float dist;
1064                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1065                 map<string, float>::iterator it;
1066                 
1067                 in >> first;
1068                 currentRow = first;
1069                 
1070                 rowDists[first] = 0.00; //distance to yourself is 0.0
1071                 
1072                 in.seekg(0);
1073                 //m->openInputFile(outfile, in);
1074                 
1075                 while(!in.eof()) {
1076                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1077                         
1078                         in >> first >> second >> dist; m->gobble(in);
1079                                 
1080                         if (first != currentRow) {
1081                                 //print out last row
1082                                 out << currentRow << '\t'; //print name
1083
1084                                 //print dists
1085                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1086                                         out << it->second << '\t';
1087                                 }
1088                                 out << endl;
1089                                 
1090                                 //start new row
1091                                 currentRow = first;
1092                                 rowDists.clear();
1093                                 rowDists[first] = 0.00;
1094                                 rowDists[second] = dist;
1095                         }else{
1096                                 rowDists[second] = dist;
1097                         }
1098                 }
1099                 //print out last row
1100                 out << currentRow << '\t'; //print name
1101                                 
1102                 //print dists
1103                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1104                         out << it->second << '\t';
1105                 }
1106                 out << endl;
1107                 
1108                 in.close();
1109                 out.close();
1110                 
1111                 m->mothurRemove(outfile);
1112                 
1113                 return 1;
1114                 
1115         }
1116         catch(exception& e) {
1117                 m->errorOut(e, "DistanceCommand", "convertMatrix");
1118                 exit(1);
1119         }
1120 }
1121 **************************************************************************************************
1122 int DistanceCommand::convertToLowerTriangle(string outputFile) {
1123         try{
1124
1125                 //sort file by first column so the distances for each row are together
1126                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1127                 
1128                 //use the unix sort 
1129                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1130                         string command = "sort -n " + outputFile + " -o " + outfile;
1131                         system(command.c_str());
1132                 #else //sort using windows sort
1133                         string command = "sort " + outputFile + " /O " + outfile;
1134                         system(command.c_str());
1135                 #endif
1136                 
1137
1138                 //output to new file distance for each row and save positions in file where new row begins
1139                 ifstream in;
1140                 m->openInputFile(outfile, in);
1141                 
1142                 ofstream out;
1143                 m->openOutputFile(outputFile, out);
1144                 
1145                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1146
1147                 out << alignDB.getNumSeqs() << endl;
1148                 
1149                 //get first currentRow
1150                 string first, currentRow, second;
1151                 float dist;
1152                 int i, j;
1153                 i = 0; j = 0;
1154                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1155                 map<string, float>::iterator it;
1156                 
1157                 in >> first;
1158                 currentRow = first;
1159                 
1160                 rowDists[first] = 0.00; //distance to yourself is 0.0
1161                 
1162                 in.seekg(0);
1163                 //m->openInputFile(outfile, in);
1164                 
1165                 while(!in.eof()) {
1166                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1167                         
1168                         in >> first >> second >> dist; m->gobble(in);
1169                                 
1170                         if (first != currentRow) {
1171                                 //print out last row
1172                                 out << currentRow << '\t'; //print name
1173
1174                                 //print dists
1175                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1176                                         if (j >= i) { break; }
1177                                         out << it->second << '\t';
1178                                         j++;
1179                                 }
1180                                 out << endl;
1181                                 
1182                                 //start new row
1183                                 currentRow = first;
1184                                 rowDists.clear();
1185                                 rowDists[first] = 0.00;
1186                                 rowDists[second] = dist;
1187                                 j = 0;
1188                                 i++;
1189                         }else{
1190                                 rowDists[second] = dist;
1191                         }
1192                 }
1193                 //print out last row
1194                 out << currentRow << '\t'; //print name
1195                                 
1196                 //print dists
1197                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1198                         out << it->second << '\t';
1199                 }
1200                 out << endl;
1201                 
1202                 in.close();
1203                 out.close();
1204                 
1205                 m->mothurRemove(outfile);
1206                 
1207                 return 1;
1208                 
1209         }
1210         catch(exception& e) {
1211                 m->errorOut(e, "DistanceCommand", "convertToLowerTriangle");
1212                 exit(1);
1213         }
1214 }
1215 **************************************************************************************************/
1216 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
1217 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
1218 //also check to make sure the 2 files have the same alignment length.
1219 bool DistanceCommand::sanityCheck() {
1220         try{
1221                 bool good = true;
1222                 
1223                 //make sure the 2 fasta files have the same alignment length
1224                 ifstream in;
1225                 m->openInputFile(fastafile, in);
1226                 int fastaAlignLength = 0;
1227                 if (in) { 
1228                         Sequence tempIn(in);
1229                         fastaAlignLength = tempIn.getAligned().length();
1230                 }
1231                 in.close();
1232                 
1233                 ifstream in2;
1234                 m->openInputFile(oldfastafile, in2);
1235                 int oldfastaAlignLength = 0;
1236                 if (in2) { 
1237                         Sequence tempIn2(in2);
1238                         oldfastaAlignLength = tempIn2.getAligned().length();
1239                 }
1240                 in2.close();
1241                 
1242                 if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length."); m->mothurOutEndLine(); return false;  }
1243                 
1244                 //read fasta file and save names as well as adding them to the alignDB
1245                 set<string> namesOldFasta;
1246                 
1247                 ifstream inFasta;
1248                 m->openInputFile(oldfastafile, inFasta);
1249                 
1250                 while (!inFasta.eof()) {
1251                         if (m->control_pressed) {  inFasta.close(); return good;  }
1252                 
1253                         Sequence temp(inFasta);
1254                         
1255                         if (temp.getName() != "") {
1256                                 namesOldFasta.insert(temp.getName());  //save name
1257                                 alignDB.push_back(temp);  //add to DB
1258                         }
1259                         
1260                         m->gobble(inFasta);
1261                 }
1262                 
1263                 inFasta.close();
1264                 
1265                 //read through the column file checking names and removing distances above the cutoff
1266                 ifstream inDist;
1267                 m->openInputFile(column, inDist);
1268                 
1269                 ofstream outDist;
1270                 string outputFile = column + ".temp";
1271                 m->openOutputFile(outputFile, outDist);
1272                 
1273                 string name1, name2;
1274                 float dist;
1275                 while (!inDist.eof()) {
1276                         if (m->control_pressed) {  inDist.close(); outDist.close(); m->mothurRemove(outputFile); return good;  }
1277                 
1278                         inDist >> name1 >> name2 >> dist; m->gobble(inDist);
1279                         
1280                         //both names are in fasta file and distance is below cutoff
1281                         if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) {  good = false; break;  }
1282                         else{
1283                                 if (dist <= cutoff) {
1284                                         outDist << name1 << '\t' << name2 << '\t' << dist << endl;
1285                                 }
1286                         }
1287                 }
1288                 
1289                 inDist.close();
1290                 outDist.close();
1291                 
1292                 if (good) {
1293                         m->mothurRemove(column);
1294                         rename(outputFile.c_str(), column.c_str());
1295                 }else{
1296                         m->mothurRemove(outputFile); //temp file is bad because file mismatch above
1297                 }
1298                 
1299                 return good;
1300                 
1301         }
1302         catch(exception& e) {
1303                 m->errorOut(e, "DistanceCommand", "sanityCheck");
1304                 exit(1);
1305         }
1306 }
1307 /**************************************************************************************************/
1308
1309
1310
1311