]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
added phylip as output file type for commands that output distance matrices. added...
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> DistanceCommand::setParameters(){        
14         try {
15                 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn);
16                 CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
19                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
20                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
21                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "DistanceCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string DistanceCommand::getHelpString(){        
38         try {
39                 string helpString = "";
40                 helpString += "The dist.seqs command reads a file containing sequences and creates a distance file.\n";
41                 helpString += "The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors.  \n";
42                 helpString += "The fasta parameter is required, unless you have a valid current fasta file.\n";
43                 helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n";
44                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
45                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
46                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
47                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
48                 helpString += "The processors parameter allows you to specify number of processors to use.  The default is 1.\n";
49                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
50                 helpString += "The dist.seqs command should be in the following format: \n";
51                 helpString += "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n";
52                 helpString += "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "DistanceCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string DistanceCommand::getOutputFileNameTag(string type, string inputName=""){ 
63         try {
64         string outputFileName = "";
65                 map<string, vector<string> >::iterator it;
66         
67         //is this a type this command creates
68         it = outputTypes.find(type);
69         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
70         else {
71             if (type == "phylip") {  outputFileName =  "dist"; }
72             else if (type == "column") {  outputFileName =  "dist"; }
73             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
74         }
75         return outputFileName;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "DistanceCommand", "getOutputFileNameTag");
79                 exit(1);
80         }
81 }
82 //**********************************************************************************************************************
83 DistanceCommand::DistanceCommand(){     
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["phylip"] = tempOutNames;
89                 outputTypes["column"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97 DistanceCommand::DistanceCommand(string option) {
98         try {
99                 abort = false; calledHelp = false;   
100                 Estimators.clear();
101                                 
102                 //allow user to run help
103                 if(option == "help") { help(); abort = true; calledHelp = true; }
104                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
105                 
106                 else {
107                         vector<string> myArray = setParameters();
108                         
109                         OptionParser parser(option);
110                         map<string, string> parameters = parser.getParameters();
111                         
112                         ValidParameters validParameter("dist.seqs");
113                         map<string, string>::iterator it2;
114                 
115                         //check to make sure all parameters are valid for command
116                         for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
117                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
118                         }
119                         
120                         //initialize outputTypes
121                         vector<string> tempOutNames;
122                         outputTypes["phylip"] = tempOutNames;
123                         outputTypes["column"] = tempOutNames;
124                 
125                         //if the user changes the input directory command factory will send this info to us in the output parameter 
126                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
127                         if (inputDir == "not found"){   inputDir = "";          }
128                         else {
129                                 string path;
130                                 it2 = parameters.find("fasta");
131                                 //user has given a template file
132                                 if(it2 != parameters.end()){ 
133                                         path = m->hasPath(it2->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["fasta"] = inputDir + it2->second;           }
136                                 }
137                                 
138                                 it2 = parameters.find("oldfasta");
139                                 //user has given a template file
140                                 if(it2 != parameters.end()){ 
141                                         path = m->hasPath(it2->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["oldfasta"] = inputDir + it2->second;                }
144                                 }
145                                 
146                                 it2 = parameters.find("column");
147                                 //user has given a template file
148                                 if(it2 != parameters.end()){ 
149                                         path = m->hasPath(it2->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["column"] = inputDir + it2->second;          }
152                                 }
153                         }
154
155                         //check for required parameters
156                         fastafile = validParameter.validFile(parameters, "fasta", true);
157                         if (fastafile == "not found") {                                 
158                                 fastafile = m->getFastaFile(); 
159                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); 
160                                         ifstream inFASTA;
161                                         m->openInputFile(fastafile, inFASTA);
162                                         alignDB = SequenceDB(inFASTA); 
163                                         inFASTA.close();
164                                 }else {         m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
165                         }else if (fastafile == "not open") { abort = true; }    
166                         else{
167                                 ifstream inFASTA;
168                                 m->openInputFile(fastafile, inFASTA);
169                                 alignDB = SequenceDB(inFASTA); 
170                                 inFASTA.close();
171                                 m->setFastaFile(fastafile);
172                         }
173                         
174                         oldfastafile = validParameter.validFile(parameters, "oldfasta", true);
175                         if (oldfastafile == "not found") { oldfastafile = ""; }
176                         else if (oldfastafile == "not open") { abort = true; }  
177                         
178                         column = validParameter.validFile(parameters, "column", true);
179                         if (column == "not found") { column = ""; }
180                         else if (column == "not open") { abort = true; }        
181                         else { m->setColumnFile(column); }
182                         
183                         //if the user changes the output directory command factory will send this info to us in the output parameter 
184                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
185                                 outputDir = ""; 
186                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
187                         }
188
189                         //check for optional parameter and set defaults
190                         // ...at some point should added some additional type checking...
191                         calc = validParameter.validFile(parameters, "calc", false);                     
192                         if (calc == "not found") { calc = "onegap";  }
193                         else { 
194                                  if (calc == "default")  {  calc = "onegap";  }
195                         }
196                         m->splitAtDash(calc, Estimators);
197
198                         string temp;
199                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
200                         convert(temp, countends); 
201                         
202                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
203                         m->mothurConvert(temp, cutoff); 
204                         
205                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
206                         m->setProcessors(temp);
207                         m->mothurConvert(temp, processors);
208                         
209                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
210                         convert(temp, compress);
211
212                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
213             if (output == "phylip") { output = "lt";  }
214                         
215                         if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
216                         
217                         if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
218                         
219                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
220
221                 }
222                                 
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
226                 exit(1);
227         }
228 }
229 //**********************************************************************************************************************
230
231 int DistanceCommand::execute(){
232         try {
233                 
234                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
235                 
236                 int startTime = time(NULL);
237                 
238                 //save number of new sequence
239                 numNewFasta = alignDB.getNumSeqs();
240                 
241                 //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
242                 if ((oldfastafile != "") && (column != ""))  {  if (!(sanityCheck())) { return 0; }  }
243                 
244                 if (m->control_pressed) { return 0; }
245                 
246                 int numSeqs = alignDB.getNumSeqs();
247                 cutoff += 0.005;
248                 
249                 if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
250                 
251                 string outputFile;
252                                 
253                 if (output == "lt") { //does the user want lower triangle phylip formatted file 
254                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip." + getOutputFileNameTag("phylip");
255                         m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
256                         
257                         //output numSeqs to phylip formatted dist file
258                 }else if (output == "column") { //user wants column format
259                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column");
260                         outputTypes["column"].push_back(outputFile);
261                         
262                         //so we don't accidentally overwrite
263                         if (outputFile == column) { 
264                                 string tempcolumn = column + ".old"; 
265                                 rename(column.c_str(), tempcolumn.c_str());
266                         }
267                         
268                         m->mothurRemove(outputFile);
269                 }else { //assume square
270                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square." + getOutputFileNameTag("phylip");
271                         m->mothurRemove(outputFile);
272                         outputTypes["phylip"].push_back(outputFile);
273                 }
274                 
275
276 #ifdef USE_MPI
277                 
278                 int pid, start, end; 
279                 int tag = 2001;
280                                 
281                 MPI_Status status; 
282                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
283                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
284                 
285                 //each process gets where it should start and stop in the file
286                 if (output != "square") {
287                         start = int (sqrt(float(pid)/float(processors)) * numSeqs);
288                         end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
289                 }else{
290                         start = int ((float(pid)/float(processors)) * numSeqs);
291                         end = int ((float(pid+1)/float(processors)) * numSeqs);
292                 }
293                 
294                 if (output == "column") {
295                         MPI_File outMPI;
296                         int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
297
298                         //char* filename = new char[outputFile.length()];
299                         //memcpy(filename, outputFile.c_str(), outputFile.length());
300                         
301                         char filename[1024];
302                         strcpy(filename, outputFile.c_str());
303                         
304                         MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
305                         //delete filename;
306
307                         if (pid == 0) { //you are the root process 
308                                 
309                                 //do your part
310                                 string outputMyPart;
311                                 
312                                 driverMPI(start, end, outMPI, cutoff); 
313                                 
314                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
315                         
316                                 //wait on chidren
317                                 for(int i = 1; i < processors; i++) { 
318                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
319                                         
320                                         char buf[5];
321                                         MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
322                                 }
323                         }else { //you are a child process
324                                 //do your part
325                                 driverMPI(start, end, outMPI, cutoff); 
326                                 
327                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
328                         
329                                 char buf[5];
330                                 strcpy(buf, "done"); 
331                                 //tell parent you are done.
332                                 MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
333                         }
334                         
335                         MPI_File_close(&outMPI);
336                         
337                 }else { //lower triangle format
338                         if (pid == 0) { //you are the root process 
339                         
340                                 //do your part
341                                 string outputMyPart;
342                                 unsigned long long mySize;
343                                 
344                                 if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
345                                 else { driverMPI(start, end, outputFile, mySize, output); }
346         
347                                 if (m->control_pressed) {  outputTypes.clear();   return 0; }
348                                 
349                                 int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
350                                 MPI_File outMPI;
351                                 MPI_File inMPI;
352
353                                 //char* filename = new char[outputFile.length()];
354                                 //memcpy(filename, outputFile.c_str(), outputFile.length());
355                                 
356                                 char filename[1024];
357                                 strcpy(filename, outputFile.c_str());
358
359                                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
360                                 //delete filename;
361
362                                 //wait on chidren
363                                 for(int b = 1; b < processors; b++) { 
364                                         unsigned long long fileSize;
365                                         
366                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
367                                         
368                                         MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
369                                         
370                                         string outTemp = outputFile + toString(b) + ".temp";
371
372                                         char* buf = new char[outTemp.length()];
373                                         memcpy(buf, outTemp.c_str(), outTemp.length());
374                                         
375                                         MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
376                                         delete buf;
377
378                                         int count = 0;
379                                         while (count < fileSize) { 
380                                                 char buf2[1];
381                                                 MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
382                                                 MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
383                                                 count += 1;
384                                         }
385                                         
386                                         MPI_File_close(&inMPI); //deleted on close
387                                 }
388                                 
389                                 MPI_File_close(&outMPI);
390                         }else { //you are a child process
391                                 //do your part
392                                 unsigned long long size;
393                                 if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
394                                 else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
395                                 
396                                 if (m->control_pressed) {  return 0; }
397                         
398                                 //tell parent you are done.
399                                 MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
400                         }
401                 }
402                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
403 #else           
404                                 
405         //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
406                 //if you don't need to fork anything
407                 if(processors == 1){
408                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
409                         else { driver(0, numSeqs, outputFile, "square");  }
410                 }else{ //you have multiple processors
411                         
412                         unsigned long long numDists = 0;
413                         
414                         if (output == "square") {
415                                  numDists = numSeqs * numSeqs;
416                         }else {
417                                 for(int i=0;i<numSeqs;i++){
418                                         for(int j=0;j<i;j++){
419                                                 numDists++;
420                                                 if (numDists > processors) { break; }
421                                         }
422                                 }
423                         }
424                         
425                         if (numDists < processors) { processors = numDists; }
426                         
427                         for (int i = 0; i < processors; i++) {
428                                 distlinePair tempLine;
429                                 lines.push_back(tempLine);
430                                 if (output != "square") {
431                                         lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
432                                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
433                                 }else{
434                                         lines[i].start = int ((float(i)/float(processors)) * numSeqs);
435                                         lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
436                                 }
437                                 
438                         }
439                         
440                         createProcesses(outputFile); 
441                 }
442         //#else
443                 //ifstream inFASTA;
444                 //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
445                 //else { driver(0, numSeqs, outputFile, "square");  }
446         //#endif
447         
448 #endif
449                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
450                 
451                 #ifdef USE_MPI
452                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
453                                         
454                         if (pid == 0) { //only one process should output to screen
455                 #endif
456                 
457                 //if (output == "square") {  convertMatrix(outputFile); }
458                 
459                 ifstream fileHandle;
460                 fileHandle.open(outputFile.c_str());
461                 if(fileHandle) {
462                         m->gobble(fileHandle);
463                         if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
464                 }
465                 
466                 //append the old column file to the new one
467                 if ((oldfastafile != "") && (column != ""))  {
468                         //we had to rename the column file so we didnt overwrite above, but we want to keep old name
469                         if (outputFile == column) { 
470                                 string tempcolumn = column + ".old";
471                                 m->appendFiles(tempcolumn, outputFile);
472                                 m->mothurRemove(tempcolumn);
473                         }else{
474                                 m->appendFiles(outputFile, column);
475                                 m->mothurRemove(outputFile);
476                                 outputFile = column;
477                         }
478                         
479                         if (outputDir != "") { 
480                                 string newOutputName = outputDir + m->getSimpleName(outputFile);
481                                 rename(outputFile.c_str(), newOutputName.c_str());
482                                 m->mothurRemove(outputFile);
483                                 outputFile = newOutputName;
484                         }
485                 }
486
487                 
488                 #ifdef USE_MPI
489                         }
490                 #endif
491                 
492                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
493                 
494                 //set phylip file as new current phylipfile
495                 string current = "";
496                 itTypes = outputTypes.find("phylip");
497                 if (itTypes != outputTypes.end()) {
498                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
499                 }
500                 
501                 //set column file as new current columnfile
502                 itTypes = outputTypes.find("column");
503                 if (itTypes != outputTypes.end()) {
504                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
505                 }
506                 
507                 m->mothurOutEndLine();
508                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
509                 m->mothurOut(outputFile); m->mothurOutEndLine();
510                 m->mothurOutEndLine();
511                 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
512
513
514                 if (m->isTrue(compress)) {
515                         m->mothurOut("Compressing..."); m->mothurOutEndLine();
516                         m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
517                         system(("gzip -v " + outputFile).c_str());
518                         outputNames.push_back(outputFile + ".gz");
519                 }else { outputNames.push_back(outputFile); }
520
521                 return 0;
522                 
523         }
524         catch(exception& e) {
525                 m->errorOut(e, "DistanceCommand", "execute");
526                 exit(1);
527         }
528 }
529 /**************************************************************************************************/
530 void DistanceCommand::createProcesses(string filename) {
531         try {
532 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
533                 int process = 1;
534                 processIDS.clear();
535                 
536                 //loop through and create all the processes you want
537                 while (process != processors) {
538                         int pid = fork();
539                         
540                         if (pid > 0) {
541                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
542                                 process++;
543                         }else if (pid == 0){
544                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
545                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
546                                 exit(0);
547                         }else { 
548                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
549                                 perror(" : ");
550                                 for (int i=0;i<processIDS.size();i++) {  int temp = processIDS[i]; kill (temp, SIGINT); }
551                                 exit(0);
552                         }
553                 }
554                 
555                 //parent does its part
556                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
557                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
558                 
559                 
560                 //force parent to wait until all the processes are done
561                 for (int i=0;i<processIDS.size();i++) { 
562                         int temp = processIDS[i];
563                         wait(&temp);
564                 }
565 #else
566                 //////////////////////////////////////////////////////////////////////////////////////////////////////
567                 //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
568                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
569                 //that's why the distance calculator was moved inside of the driver to make separate copies.
570                 //////////////////////////////////////////////////////////////////////////////////////////////////////
571                 
572                 vector<distanceData*> pDataArray; //[processors-1];
573                 DWORD   dwThreadIdArray[processors-1];
574                 HANDLE  hThreadArray[processors-1]; 
575                 
576                 //Create processor-1 worker threads.
577                 for( int i=0; i<processors-1; i++ ){
578                         
579                         // Allocate memory for thread data.
580                         distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
581                         pDataArray.push_back(tempDist);
582                         processIDS.push_back(i);
583                         
584                         //MyDistThreadFunction is in header. It must be global or static to work with the threads.
585                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
586                         hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
587                 }
588                 
589                 //do your part
590                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
591                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
592                 
593                 //Wait until all threads have terminated.
594                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
595                 
596                 //Close all thread handles and free memory allocations.
597                 for(int i=0; i < pDataArray.size(); i++){
598                         CloseHandle(hThreadArray[i]);
599                         delete pDataArray[i];
600                 }
601 #endif
602                 
603                 //append and remove temp files
604                 for (int i=0;i<processIDS.size();i++) { 
605                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
606                         m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
607                 }
608                 
609         }
610         catch(exception& e) {
611                 m->errorOut(e, "DistanceCommand", "createProcesses");
612                 exit(1);
613         }
614 }
615 /**************************************************************************************************/
616 /////// need to fix to work with calcs and sequencedb
617 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
618         try {
619                 ValidCalculators validCalculator;
620                 Dist* distCalculator;
621                 if (m->isTrue(countends) == true) {
622                         for (int i=0; i<Estimators.size(); i++) {
623                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
624                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
625                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
626                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
627                                 }
628                         }
629                 }else {
630                         for (int i=0; i<Estimators.size(); i++) {
631                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
632                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
633                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
634                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
635                                 }
636                         }
637                 }
638                 
639                 int startTime = time(NULL);
640                 
641                 //column file
642                 ofstream outFile(dFileName.c_str(), ios::trunc);
643                 outFile.setf(ios::fixed, ios::showpoint);
644                 outFile << setprecision(4);
645                 
646                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
647                 
648                 for(int i=startLine;i<endLine;i++){
649                         if(output == "lt")      {       
650                                 string name = alignDB.get(i).getName();
651                                 if (name.length() < 10) { //pad with spaces to make compatible
652                                         while (name.length() < 10) {  name += " ";  }
653                                 }
654                                 outFile << name << '\t';        
655                         }
656                         for(int j=0;j<i;j++){
657                                 
658                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
659                                 
660                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
661                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
662                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
663                                 
664                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
665                                 double dist = distCalculator->getDist();
666                                 
667                                 if(dist <= cutoff){
668                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
669                                 }
670                                 if (output == "lt") {  outFile << dist << '\t'; }
671                         }
672                         
673                         if (output == "lt") { outFile << endl; }
674                         
675                         if(i % 100 == 0){
676                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
677                         }
678                         
679                 }
680                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
681                 
682                 outFile.close();
683                 delete distCalculator;
684                 
685                 return 1;
686         }
687         catch(exception& e) {
688                 m->errorOut(e, "DistanceCommand", "driver");
689                 exit(1);
690         }
691 }
692 /**************************************************************************************************/
693 /////// need to fix to work with calcs and sequencedb
694 int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
695         try {
696                 ValidCalculators validCalculator;
697                 Dist* distCalculator;
698                 if (m->isTrue(countends) == true) {
699                         for (int i=0; i<Estimators.size(); i++) {
700                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
701                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
702                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
703                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
704                                 }
705                         }
706                 }else {
707                         for (int i=0; i<Estimators.size(); i++) {
708                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
709                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
710                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
711                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
712                                 }
713                         }
714                 }
715                 
716                 int startTime = time(NULL);
717                 
718                 //column file
719                 ofstream outFile(dFileName.c_str(), ios::trunc);
720                 outFile.setf(ios::fixed, ios::showpoint);
721                 outFile << setprecision(4);
722                 
723                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
724                 
725                 for(int i=startLine;i<endLine;i++){
726                                 
727                         string name = alignDB.get(i).getName();
728                         //pad with spaces to make compatible
729                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
730                                 
731                         outFile << name << '\t';        
732                         
733                         for(int j=0;j<alignDB.getNumSeqs();j++){
734                                 
735                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
736                                 
737                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
738                                 double dist = distCalculator->getDist();
739                                 
740                                 outFile << dist << '\t'; 
741                         }
742                         
743                         outFile << endl; 
744                         
745                         if(i % 100 == 0){
746                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
747                         }
748                         
749                 }
750                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
751                 
752                 outFile.close();
753                 delete distCalculator;
754                 
755                 return 1;
756         }
757         catch(exception& e) {
758                 m->errorOut(e, "DistanceCommand", "driver");
759                 exit(1);
760         }
761 }
762 #ifdef USE_MPI
763 /**************************************************************************************************/
764 /////// need to fix to work with calcs and sequencedb
765 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
766         try {
767                 
768                 ValidCalculators validCalculator;
769                 Dist* distCalculator;
770                 if (m->isTrue(countends) == true) {
771                         for (int i=0; i<Estimators.size(); i++) {
772                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
773                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
774                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
775                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
776                                 }
777                         }
778                 }else {
779                         for (int i=0; i<Estimators.size(); i++) {
780                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
781                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
782                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
783                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
784                                 }
785                         }
786                 }
787                 
788                 
789                 MPI_Status status;
790                 int startTime = time(NULL);
791                 
792                 string outputString = "";
793                 
794                 for(int i=startLine;i<endLine;i++){
795         
796                         for(int j=0;j<i;j++){
797                                 
798                                 if (m->control_pressed) {  delete distCalculator; return 0;  }
799                                 
800                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
801                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
802                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
803                                 
804                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
805                                 double dist = distCalculator->getDist();
806                                 
807                                 if(dist <= cutoff){
808                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
809                                 }
810                         }
811                         
812                         if(i % 100 == 0){
813                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
814                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
815                         }
816                         
817                          
818                         //send results to parent
819                         int length = outputString.length();
820
821                         char* buf = new char[length];
822                         memcpy(buf, outputString.c_str(), length);
823                         
824                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
825                         outputString = "";
826                         delete buf;
827                         
828                 }
829                 
830                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
831                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;        
832                 delete distCalculator;
833                 return 1;
834         }
835         catch(exception& e) {
836                 m->errorOut(e, "DistanceCommand", "driverMPI");
837                 exit(1);
838         }
839 }
840 /**************************************************************************************************/
841 /////// need to fix to work with calcs and sequencedb
842 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size){
843         try {
844                 ValidCalculators validCalculator;
845                 Dist* distCalculator;
846                 if (m->isTrue(countends) == true) {
847                         for (int i=0; i<Estimators.size(); i++) {
848                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
849                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
850                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
851                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
852                                 }
853                         }
854                 }else {
855                         for (int i=0; i<Estimators.size(); i++) {
856                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
857                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
858                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
859                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
860                                 }
861                         }
862                 }
863                 
864                 
865                 MPI_Status status;
866                 
867                 MPI_File outMPI;
868                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
869
870                 //char* filename = new char[file.length()];
871                 //memcpy(filename, file.c_str(), file.length());
872                 
873                 char filename[1024];
874                 strcpy(filename, file.c_str());
875
876                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
877                 //delete filename;
878
879                 int startTime = time(NULL);
880                 
881                 string outputString = "";
882                 size = 0;
883                 
884                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
885                 
886                 for(int i=startLine;i<endLine;i++){
887                                 
888                         string name = alignDB.get(i).getName();
889                         if (name.length() < 10) { //pad with spaces to make compatible
890                                 while (name.length() < 10) {  name += " ";  }
891                         }
892                         outputString += name + "\t";    
893                         
894                         for(int j=0;j<i;j++){
895                                 
896                                 if (m->control_pressed) { delete distCalculator; return 0;  }
897                                 
898                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
899                                 double dist = distCalculator->getDist();
900                                 
901                                 outputString += toString(dist) + "\t"; 
902                         }
903                         
904                         outputString += "\n"; 
905
906                 
907                         if(i % 100 == 0){
908                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
909                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
910                         }
911                         
912                         
913                         //send results to parent
914                         int length = outputString.length();
915                         char* buf = new char[length];
916                         memcpy(buf, outputString.c_str(), length);
917                         
918                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
919                         size += outputString.length();
920                         outputString = "";
921                         delete buf;
922                 }
923                 
924                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
925                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
926                 MPI_File_close(&outMPI);
927                 delete distCalculator;
928                 
929                 return 1;
930         }
931         catch(exception& e) {
932                 m->errorOut(e, "DistanceCommand", "driverMPI");
933                 exit(1);
934         }
935 }
936 /**************************************************************************************************/
937 /////// need to fix to work with calcs and sequencedb
938 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){
939         try {
940                 ValidCalculators validCalculator;
941                 Dist* distCalculator;
942                 if (m->isTrue(countends) == true) {
943                         for (int i=0; i<Estimators.size(); i++) {
944                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
945                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
946                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
947                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
948                                 }
949                         }
950                 }else {
951                         for (int i=0; i<Estimators.size(); i++) {
952                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
953                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
954                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
955                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
956                                 }
957                         }
958                 }
959                 
960                 MPI_Status status;
961                 
962                 MPI_File outMPI;
963                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
964
965                 //char* filename = new char[file.length()];
966                 //memcpy(filename, file.c_str(), file.length());
967                 
968                 char filename[1024];
969                 strcpy(filename, file.c_str());
970
971                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
972                 //delete filename;
973
974                 int startTime = time(NULL);
975                 
976                 string outputString = "";
977                 size = 0;
978                 
979                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
980                 
981                 for(int i=startLine;i<endLine;i++){
982                                 
983                         string name = alignDB.get(i).getName();
984                         if (name.length() < 10) { //pad with spaces to make compatible
985                                 while (name.length() < 10) {  name += " ";  }
986                         }
987                         outputString += name + "\t";    
988                         
989                         for(int j=0;j<alignDB.getNumSeqs();j++){
990                                 
991                                 if (m->control_pressed) { delete distCalculator; return 0;  }
992                                 
993                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
994                                 double dist = distCalculator->getDist();
995                                 
996                                 outputString += toString(dist) + "\t"; 
997                         }
998                         
999                         outputString += "\n"; 
1000
1001                 
1002                         if(i % 100 == 0){
1003                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
1004                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
1005                         }
1006                         
1007                         
1008                         //send results to parent
1009                         int length = outputString.length();
1010                         char* buf = new char[length];
1011                         memcpy(buf, outputString.c_str(), length);
1012                         
1013                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
1014                         size += outputString.length();
1015                         outputString = "";
1016                         delete buf;
1017                 }
1018                 
1019                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
1020                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
1021                 MPI_File_close(&outMPI);
1022                 delete distCalculator;
1023                 return 1;
1024         }
1025         catch(exception& e) {
1026                 m->errorOut(e, "DistanceCommand", "driverMPI");
1027                 exit(1);
1028         }
1029 }
1030 #endif
1031 /**************************************************************************************************
1032 int DistanceCommand::convertMatrix(string outputFile) {
1033         try{
1034
1035                 //sort file by first column so the distances for each row are together
1036                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1037                 
1038                 //use the unix sort 
1039                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1040                         string command = "sort -n " + outputFile + " -o " + outfile;
1041                         system(command.c_str());
1042                 #else //sort using windows sort
1043                         string command = "sort " + outputFile + " /O " + outfile;
1044                         system(command.c_str());
1045                 #endif
1046                 
1047
1048                 //output to new file distance for each row and save positions in file where new row begins
1049                 ifstream in;
1050                 m->openInputFile(outfile, in);
1051                 
1052                 ofstream out;
1053                 m->openOutputFile(outputFile, out);
1054                 
1055                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1056
1057                 out << alignDB.getNumSeqs() << endl;
1058                 
1059                 //get first currentRow
1060                 string first, currentRow, second;
1061                 float dist;
1062                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1063                 map<string, float>::iterator it;
1064                 
1065                 in >> first;
1066                 currentRow = first;
1067                 
1068                 rowDists[first] = 0.00; //distance to yourself is 0.0
1069                 
1070                 in.seekg(0);
1071                 //m->openInputFile(outfile, in);
1072                 
1073                 while(!in.eof()) {
1074                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1075                         
1076                         in >> first >> second >> dist; m->gobble(in);
1077                                 
1078                         if (first != currentRow) {
1079                                 //print out last row
1080                                 out << currentRow << '\t'; //print name
1081
1082                                 //print dists
1083                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1084                                         out << it->second << '\t';
1085                                 }
1086                                 out << endl;
1087                                 
1088                                 //start new row
1089                                 currentRow = first;
1090                                 rowDists.clear();
1091                                 rowDists[first] = 0.00;
1092                                 rowDists[second] = dist;
1093                         }else{
1094                                 rowDists[second] = dist;
1095                         }
1096                 }
1097                 //print out last row
1098                 out << currentRow << '\t'; //print name
1099                                 
1100                 //print dists
1101                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1102                         out << it->second << '\t';
1103                 }
1104                 out << endl;
1105                 
1106                 in.close();
1107                 out.close();
1108                 
1109                 m->mothurRemove(outfile);
1110                 
1111                 return 1;
1112                 
1113         }
1114         catch(exception& e) {
1115                 m->errorOut(e, "DistanceCommand", "convertMatrix");
1116                 exit(1);
1117         }
1118 }
1119 **************************************************************************************************
1120 int DistanceCommand::convertToLowerTriangle(string outputFile) {
1121         try{
1122
1123                 //sort file by first column so the distances for each row are together
1124                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1125                 
1126                 //use the unix sort 
1127                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1128                         string command = "sort -n " + outputFile + " -o " + outfile;
1129                         system(command.c_str());
1130                 #else //sort using windows sort
1131                         string command = "sort " + outputFile + " /O " + outfile;
1132                         system(command.c_str());
1133                 #endif
1134                 
1135
1136                 //output to new file distance for each row and save positions in file where new row begins
1137                 ifstream in;
1138                 m->openInputFile(outfile, in);
1139                 
1140                 ofstream out;
1141                 m->openOutputFile(outputFile, out);
1142                 
1143                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1144
1145                 out << alignDB.getNumSeqs() << endl;
1146                 
1147                 //get first currentRow
1148                 string first, currentRow, second;
1149                 float dist;
1150                 int i, j;
1151                 i = 0; j = 0;
1152                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1153                 map<string, float>::iterator it;
1154                 
1155                 in >> first;
1156                 currentRow = first;
1157                 
1158                 rowDists[first] = 0.00; //distance to yourself is 0.0
1159                 
1160                 in.seekg(0);
1161                 //m->openInputFile(outfile, in);
1162                 
1163                 while(!in.eof()) {
1164                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1165                         
1166                         in >> first >> second >> dist; m->gobble(in);
1167                                 
1168                         if (first != currentRow) {
1169                                 //print out last row
1170                                 out << currentRow << '\t'; //print name
1171
1172                                 //print dists
1173                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1174                                         if (j >= i) { break; }
1175                                         out << it->second << '\t';
1176                                         j++;
1177                                 }
1178                                 out << endl;
1179                                 
1180                                 //start new row
1181                                 currentRow = first;
1182                                 rowDists.clear();
1183                                 rowDists[first] = 0.00;
1184                                 rowDists[second] = dist;
1185                                 j = 0;
1186                                 i++;
1187                         }else{
1188                                 rowDists[second] = dist;
1189                         }
1190                 }
1191                 //print out last row
1192                 out << currentRow << '\t'; //print name
1193                                 
1194                 //print dists
1195                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1196                         out << it->second << '\t';
1197                 }
1198                 out << endl;
1199                 
1200                 in.close();
1201                 out.close();
1202                 
1203                 m->mothurRemove(outfile);
1204                 
1205                 return 1;
1206                 
1207         }
1208         catch(exception& e) {
1209                 m->errorOut(e, "DistanceCommand", "convertToLowerTriangle");
1210                 exit(1);
1211         }
1212 }
1213 **************************************************************************************************/
1214 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
1215 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
1216 //also check to make sure the 2 files have the same alignment length.
1217 bool DistanceCommand::sanityCheck() {
1218         try{
1219                 bool good = true;
1220                 
1221                 //make sure the 2 fasta files have the same alignment length
1222                 ifstream in;
1223                 m->openInputFile(fastafile, in);
1224                 int fastaAlignLength = 0;
1225                 if (in) { 
1226                         Sequence tempIn(in);
1227                         fastaAlignLength = tempIn.getAligned().length();
1228                 }
1229                 in.close();
1230                 
1231                 ifstream in2;
1232                 m->openInputFile(oldfastafile, in2);
1233                 int oldfastaAlignLength = 0;
1234                 if (in2) { 
1235                         Sequence tempIn2(in2);
1236                         oldfastaAlignLength = tempIn2.getAligned().length();
1237                 }
1238                 in2.close();
1239                 
1240                 if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length."); m->mothurOutEndLine(); return false;  }
1241                 
1242                 //read fasta file and save names as well as adding them to the alignDB
1243                 set<string> namesOldFasta;
1244                 
1245                 ifstream inFasta;
1246                 m->openInputFile(oldfastafile, inFasta);
1247                 
1248                 while (!inFasta.eof()) {
1249                         if (m->control_pressed) {  inFasta.close(); return good;  }
1250                 
1251                         Sequence temp(inFasta);
1252                         
1253                         if (temp.getName() != "") {
1254                                 namesOldFasta.insert(temp.getName());  //save name
1255                                 alignDB.push_back(temp);  //add to DB
1256                         }
1257                         
1258                         m->gobble(inFasta);
1259                 }
1260                 
1261                 inFasta.close();
1262                 
1263                 //read through the column file checking names and removing distances above the cutoff
1264                 ifstream inDist;
1265                 m->openInputFile(column, inDist);
1266                 
1267                 ofstream outDist;
1268                 string outputFile = column + ".temp";
1269                 m->openOutputFile(outputFile, outDist);
1270                 
1271                 string name1, name2;
1272                 float dist;
1273                 while (!inDist.eof()) {
1274                         if (m->control_pressed) {  inDist.close(); outDist.close(); m->mothurRemove(outputFile); return good;  }
1275                 
1276                         inDist >> name1 >> name2 >> dist; m->gobble(inDist);
1277                         
1278                         //both names are in fasta file and distance is below cutoff
1279                         if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) {  good = false; break;  }
1280                         else{
1281                                 if (dist <= cutoff) {
1282                                         outDist << name1 << '\t' << name2 << '\t' << dist << endl;
1283                                 }
1284                         }
1285                 }
1286                 
1287                 inDist.close();
1288                 outDist.close();
1289                 
1290                 if (good) {
1291                         m->mothurRemove(column);
1292                         rename(outputFile.c_str(), column.c_str());
1293                 }else{
1294                         m->mothurRemove(outputFile); //temp file is bad because file mismatch above
1295                 }
1296                 
1297                 return good;
1298                 
1299         }
1300         catch(exception& e) {
1301                 m->errorOut(e, "DistanceCommand", "sanityCheck");
1302                 exit(1);
1303         }
1304 }
1305 /**************************************************************************************************/
1306
1307
1308
1309