]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
a lot of new input parameters in rf module, implemented a pruning system in the rf...
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> DistanceCommand::setParameters(){        
14         try {
15                 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn","column",false,false); parameters.push_back(pcolumn);
16                 CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn","",false,false); parameters.push_back(poldfasta);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true, true); parameters.push_back(pfasta);
18                 CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false, true); parameters.push_back(poutput);
19                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc);
20                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends);
21                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false, true); parameters.push_back(pprocessors);
23                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false, true); parameters.push_back(pcutoff);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "DistanceCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string DistanceCommand::getHelpString(){        
38         try {
39                 string helpString = "";
40                 helpString += "The dist.seqs command reads a file containing sequences and creates a distance file.\n";
41                 helpString += "The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors.  \n";
42                 helpString += "The fasta parameter is required, unless you have a valid current fasta file.\n";
43                 helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n";
44                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
45                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
46                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
47                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
48                 helpString += "The processors parameter allows you to specify number of processors to use.  The default is 1.\n";
49                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
50                 helpString += "The dist.seqs command should be in the following format: \n";
51                 helpString += "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n";
52                 helpString += "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "DistanceCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string DistanceCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "phylip") {  pattern = "[filename],[outputtag],dist"; } 
67         else if (type == "column") { pattern = "[filename],dist"; }
68         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
69         
70         return pattern;
71     }
72     catch(exception& e) {
73         m->errorOut(e, "DistanceCommand", "getOutputPattern");
74         exit(1);
75     }
76 }
77 //**********************************************************************************************************************
78 DistanceCommand::DistanceCommand(){     
79         try {
80                 abort = true; calledHelp = true; 
81                 setParameters();
82                 vector<string> tempOutNames;
83                 outputTypes["phylip"] = tempOutNames;
84                 outputTypes["column"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 DistanceCommand::DistanceCommand(string option) {
93         try {
94                 abort = false; calledHelp = false;   
95                 Estimators.clear();
96                                 
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string, string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter("dist.seqs");
108                         map<string, string>::iterator it2;
109                 
110                         //check to make sure all parameters are valid for command
111                         for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
112                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["phylip"] = tempOutNames;
118                         outputTypes["column"] = tempOutNames;
119                 
120                         //if the user changes the input directory command factory will send this info to us in the output parameter 
121                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
122                         if (inputDir == "not found"){   inputDir = "";          }
123                         else {
124                                 string path;
125                                 it2 = parameters.find("fasta");
126                                 //user has given a template file
127                                 if(it2 != parameters.end()){ 
128                                         path = m->hasPath(it2->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["fasta"] = inputDir + it2->second;           }
131                                 }
132                                 
133                                 it2 = parameters.find("oldfasta");
134                                 //user has given a template file
135                                 if(it2 != parameters.end()){ 
136                                         path = m->hasPath(it2->second);
137                                         //if the user has not given a path then, add inputdir. else leave path alone.
138                                         if (path == "") {       parameters["oldfasta"] = inputDir + it2->second;                }
139                                 }
140                                 
141                                 it2 = parameters.find("column");
142                                 //user has given a template file
143                                 if(it2 != parameters.end()){ 
144                                         path = m->hasPath(it2->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["column"] = inputDir + it2->second;          }
147                                 }
148                         }
149
150                         //check for required parameters
151                         fastafile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastafile == "not found") {                                 
153                                 fastafile = m->getFastaFile(); 
154                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); 
155                                         ifstream inFASTA;
156                                         m->openInputFile(fastafile, inFASTA);
157                                         alignDB = SequenceDB(inFASTA); 
158                                         inFASTA.close();
159                                 }else {         m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
160                         }else if (fastafile == "not open") { abort = true; }    
161                         else{
162                                 ifstream inFASTA;
163                                 m->openInputFile(fastafile, inFASTA);
164                                 alignDB = SequenceDB(inFASTA); 
165                                 inFASTA.close();
166                                 m->setFastaFile(fastafile);
167                         }
168                         
169                         oldfastafile = validParameter.validFile(parameters, "oldfasta", true);
170                         if (oldfastafile == "not found") { oldfastafile = ""; }
171                         else if (oldfastafile == "not open") { abort = true; }  
172                         
173                         column = validParameter.validFile(parameters, "column", true);
174                         if (column == "not found") { column = ""; }
175                         else if (column == "not open") { abort = true; }        
176                         else { m->setColumnFile(column); }
177                         
178                         //if the user changes the output directory command factory will send this info to us in the output parameter 
179                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
180                                 outputDir = ""; 
181                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
182                         }
183
184                         //check for optional parameter and set defaults
185                         // ...at some point should added some additional type checking...
186                         calc = validParameter.validFile(parameters, "calc", false);                     
187                         if (calc == "not found") { calc = "onegap";  }
188                         else { 
189                                  if (calc == "default")  {  calc = "onegap";  }
190                         }
191                         m->splitAtDash(calc, Estimators);
192
193                         string temp;
194                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
195                         convert(temp, countends); 
196                         
197                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
198                         m->mothurConvert(temp, cutoff); 
199                         
200                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
201                         m->setProcessors(temp);
202                         m->mothurConvert(temp, processors);
203                         
204                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
205                         convert(temp, compress);
206
207                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
208             if (output == "phylip") { output = "lt";  }
209                         
210                         if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
211                         
212                         if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
213                         
214                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
215
216                 }
217                                 
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
221                 exit(1);
222         }
223 }
224 //**********************************************************************************************************************
225
226 int DistanceCommand::execute(){
227         try {
228                 
229                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
230                 
231                 int startTime = time(NULL);
232                 
233                 //save number of new sequence
234                 numNewFasta = alignDB.getNumSeqs();
235                 
236                 //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
237                 if ((oldfastafile != "") && (column != ""))  {  if (!(sanityCheck())) { return 0; }  }
238                 
239                 if (m->control_pressed) { return 0; }
240                 
241                 int numSeqs = alignDB.getNumSeqs();
242                 cutoff += 0.005;
243                 
244                 if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
245                 
246                 string outputFile;
247         
248         map<string, string> variables; 
249         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
250                 if (output == "lt") { //does the user want lower triangle phylip formatted file 
251             variables["[outputtag]"] = "phylip";
252                         outputFile = getOutputFileName("phylip", variables);
253                         m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
254                         
255                         //output numSeqs to phylip formatted dist file
256                 }else if (output == "column") { //user wants column format
257                         outputFile = getOutputFileName("column", variables);
258                         outputTypes["column"].push_back(outputFile);
259                         
260                         //so we don't accidentally overwrite
261                         if (outputFile == column) { 
262                                 string tempcolumn = column + ".old"; 
263                                 rename(column.c_str(), tempcolumn.c_str());
264                         }
265                         
266                         m->mothurRemove(outputFile);
267                 }else { //assume square
268                         variables["[outputtag]"] = "square";
269                         outputFile = getOutputFileName("phylip", variables);
270                         m->mothurRemove(outputFile);
271                         outputTypes["phylip"].push_back(outputFile);
272                 }
273                 
274
275 #ifdef USE_MPI
276                 
277                 int pid, start, end; 
278                 int tag = 2001;
279                                 
280                 MPI_Status status; 
281                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
282                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
283                 
284                 //each process gets where it should start and stop in the file
285                 if (output != "square") {
286                         start = int (sqrt(float(pid)/float(processors)) * numSeqs);
287                         end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
288                 }else{
289                         start = int ((float(pid)/float(processors)) * numSeqs);
290                         end = int ((float(pid+1)/float(processors)) * numSeqs);
291                 }
292                 
293                 if (output == "column") {
294                         MPI_File outMPI;
295                         int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
296
297                         //char* filename = new char[outputFile.length()];
298                         //memcpy(filename, outputFile.c_str(), outputFile.length());
299                         
300                         char filename[1024];
301                         strcpy(filename, outputFile.c_str());
302                         
303                         MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
304                         //delete filename;
305
306                         if (pid == 0) { //you are the root process 
307                                 
308                                 //do your part
309                                 string outputMyPart;
310                                 
311                                 driverMPI(start, end, outMPI, cutoff); 
312                                 
313                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
314                         
315                                 //wait on chidren
316                                 for(int i = 1; i < processors; i++) { 
317                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
318                                         
319                                         char buf[5];
320                                         MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
321                                 }
322                         }else { //you are a child process
323                                 //do your part
324                                 driverMPI(start, end, outMPI, cutoff); 
325                                 
326                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
327                         
328                                 char buf[5];
329                                 strcpy(buf, "done"); 
330                                 //tell parent you are done.
331                                 MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
332                         }
333                         
334                         MPI_File_close(&outMPI);
335                         
336                 }else { //lower triangle format
337                         if (pid == 0) { //you are the root process 
338                         
339                                 //do your part
340                                 string outputMyPart;
341                                 unsigned long long mySize;
342                                 
343                                 if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
344                                 else { driverMPI(start, end, outputFile, mySize, output); }
345         
346                                 if (m->control_pressed) {  outputTypes.clear();   return 0; }
347                                 
348                                 int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
349                                 MPI_File outMPI;
350                                 MPI_File inMPI;
351
352                                 //char* filename = new char[outputFile.length()];
353                                 //memcpy(filename, outputFile.c_str(), outputFile.length());
354                                 
355                                 char filename[1024];
356                                 strcpy(filename, outputFile.c_str());
357
358                                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
359                                 //delete filename;
360
361                                 //wait on chidren
362                                 for(int b = 1; b < processors; b++) { 
363                                         unsigned long long fileSize;
364                                         
365                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
366                                         
367                                         MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
368                                         
369                                         string outTemp = outputFile + toString(b) + ".temp";
370
371                                         char* buf = new char[outTemp.length()];
372                                         memcpy(buf, outTemp.c_str(), outTemp.length());
373                                         
374                                         MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
375                                         delete buf;
376
377                                         int count = 0;
378                                         while (count < fileSize) { 
379                                                 char buf2[1];
380                                                 MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
381                                                 MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
382                                                 count += 1;
383                                         }
384                                         
385                                         MPI_File_close(&inMPI); //deleted on close
386                                 }
387                                 
388                                 MPI_File_close(&outMPI);
389                         }else { //you are a child process
390                                 //do your part
391                                 unsigned long long size;
392                                 if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
393                                 else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
394                                 
395                                 if (m->control_pressed) {  return 0; }
396                         
397                                 //tell parent you are done.
398                                 MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
399                         }
400                 }
401                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
402 #else           
403                                 
404         //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
405                 //if you don't need to fork anything
406                 if(processors == 1){
407                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
408                         else { driver(0, numSeqs, outputFile, "square");  }
409                 }else{ //you have multiple processors
410                         
411                         unsigned long long numDists = 0;
412                         
413                         if (output == "square") {
414                                  numDists = numSeqs * numSeqs;
415                         }else {
416                                 for(int i=0;i<numSeqs;i++){
417                                         for(int j=0;j<i;j++){
418                                                 numDists++;
419                                                 if (numDists > processors) { break; }
420                                         }
421                                 }
422                         }
423                         
424                         if (numDists < processors) { processors = numDists; }
425                         
426                         for (int i = 0; i < processors; i++) {
427                                 distlinePair tempLine;
428                                 lines.push_back(tempLine);
429                                 if (output != "square") {
430                                         lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
431                                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
432                                 }else{
433                                         lines[i].start = int ((float(i)/float(processors)) * numSeqs);
434                                         lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
435                                 }
436                                 
437                         }
438                         
439                         createProcesses(outputFile); 
440                 }
441         //#else
442                 //ifstream inFASTA;
443                 //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
444                 //else { driver(0, numSeqs, outputFile, "square");  }
445         //#endif
446         
447 #endif
448                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
449                 
450                 #ifdef USE_MPI
451                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
452                                         
453                         if (pid == 0) { //only one process should output to screen
454                 #endif
455                 
456                 //if (output == "square") {  convertMatrix(outputFile); }
457                 
458                 ifstream fileHandle;
459                 fileHandle.open(outputFile.c_str());
460                 if(fileHandle) {
461                         m->gobble(fileHandle);
462                         if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
463                 }
464                 
465                 //append the old column file to the new one
466                 if ((oldfastafile != "") && (column != ""))  {
467                         //we had to rename the column file so we didnt overwrite above, but we want to keep old name
468                         if (outputFile == column) { 
469                                 string tempcolumn = column + ".old";
470                                 m->appendFiles(tempcolumn, outputFile);
471                                 m->mothurRemove(tempcolumn);
472                         }else{
473                                 m->appendFiles(outputFile, column);
474                                 m->mothurRemove(outputFile);
475                                 outputFile = column;
476                         }
477                         
478                         if (outputDir != "") { 
479                                 string newOutputName = outputDir + m->getSimpleName(outputFile);
480                                 rename(outputFile.c_str(), newOutputName.c_str());
481                                 m->mothurRemove(outputFile);
482                                 outputFile = newOutputName;
483                         }
484                 }
485
486                 
487                 #ifdef USE_MPI
488                         }
489                 #endif
490                 
491                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
492                 
493                 //set phylip file as new current phylipfile
494                 string current = "";
495                 itTypes = outputTypes.find("phylip");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
498                 }
499                 
500                 //set column file as new current columnfile
501                 itTypes = outputTypes.find("column");
502                 if (itTypes != outputTypes.end()) {
503                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
504                 }
505                 
506                 m->mothurOutEndLine();
507                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
508                 m->mothurOut(outputFile); m->mothurOutEndLine();
509                 m->mothurOutEndLine();
510                 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
511
512
513                 if (m->isTrue(compress)) {
514                         m->mothurOut("Compressing..."); m->mothurOutEndLine();
515                         m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
516                         system(("gzip -v " + outputFile).c_str());
517                         outputNames.push_back(outputFile + ".gz");
518                 }else { outputNames.push_back(outputFile); }
519
520                 return 0;
521                 
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "DistanceCommand", "execute");
525                 exit(1);
526         }
527 }
528 /**************************************************************************************************/
529 void DistanceCommand::createProcesses(string filename) {
530         try {
531 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
532                 int process = 1;
533                 processIDS.clear();
534                 
535                 //loop through and create all the processes you want
536                 while (process != processors) {
537                         int pid = fork();
538                         
539                         if (pid > 0) {
540                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
541                                 process++;
542                         }else if (pid == 0){
543                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
544                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
545                                 exit(0);
546                         }else { 
547                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
548                                 perror(" : ");
549                                 for (int i=0;i<processIDS.size();i++) {  int temp = processIDS[i]; kill (temp, SIGINT); }
550                                 exit(0);
551                         }
552                 }
553                 
554                 //parent does its part
555                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
556                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
557                 
558                 
559                 //force parent to wait until all the processes are done
560                 for (int i=0;i<processIDS.size();i++) { 
561                         int temp = processIDS[i];
562                         wait(&temp);
563                 }
564 #else
565                 //////////////////////////////////////////////////////////////////////////////////////////////////////
566                 //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
567                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
568                 //that's why the distance calculator was moved inside of the driver to make separate copies.
569                 //////////////////////////////////////////////////////////////////////////////////////////////////////
570                 
571                 vector<distanceData*> pDataArray; //[processors-1];
572                 DWORD   dwThreadIdArray[processors-1];
573                 HANDLE  hThreadArray[processors-1]; 
574                 
575                 //Create processor-1 worker threads.
576                 for( int i=0; i<processors-1; i++ ){
577                         
578                         // Allocate memory for thread data.
579                         distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
580                         pDataArray.push_back(tempDist);
581                         processIDS.push_back(i);
582                         
583                         //MyDistThreadFunction is in header. It must be global or static to work with the threads.
584                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
585                         hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
586                 }
587                 
588                 //do your part
589                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
590                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
591                 
592                 //Wait until all threads have terminated.
593                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
594                 
595                 //Close all thread handles and free memory allocations.
596                 for(int i=0; i < pDataArray.size(); i++){
597             if (pDataArray[i]->count != (pDataArray[i]->endLine-pDataArray[i]->startLine)) {
598                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->endLine-pDataArray[i]->startLine) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
599             }
600                         CloseHandle(hThreadArray[i]);
601                         delete pDataArray[i];
602                 }
603 #endif
604                 
605                 //append and remove temp files
606                 for (int i=0;i<processIDS.size();i++) { 
607                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
608                         m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
609                 }
610                 
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "DistanceCommand", "createProcesses");
614                 exit(1);
615         }
616 }
617 /**************************************************************************************************/
618 /////// need to fix to work with calcs and sequencedb
619 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
620         try {
621                 ValidCalculators validCalculator;
622                 Dist* distCalculator;
623                 if (m->isTrue(countends) == true) {
624                         for (int i=0; i<Estimators.size(); i++) {
625                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
626                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
627                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
628                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
629                                 }
630                         }
631                 }else {
632                         for (int i=0; i<Estimators.size(); i++) {
633                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
634                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
635                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
636                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
637                                 }
638                         }
639                 }
640                 
641                 int startTime = time(NULL);
642                 
643                 //column file
644                 ofstream outFile(dFileName.c_str(), ios::trunc);
645                 outFile.setf(ios::fixed, ios::showpoint);
646                 outFile << setprecision(4);
647                 
648                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
649                 
650                 for(int i=startLine;i<endLine;i++){
651                         if(output == "lt")      {       
652                                 string name = alignDB.get(i).getName();
653                                 if (name.length() < 10) { //pad with spaces to make compatible
654                                         while (name.length() < 10) {  name += " ";  }
655                                 }
656                                 outFile << name << '\t';        
657                         }
658                         for(int j=0;j<i;j++){
659                                 
660                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
661                                 
662                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
663                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
664                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
665                                 
666                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
667                                 double dist = distCalculator->getDist();
668                                 
669                                 if(dist <= cutoff){
670                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
671                                 }
672                                 if (output == "lt") {  outFile << dist << '\t'; }
673                         }
674                         
675                         if (output == "lt") { outFile << endl; }
676                         
677                         if(i % 100 == 0){
678                                 m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
679                         }
680                         
681                 }
682                 m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n");
683                 
684                 outFile.close();
685                 delete distCalculator;
686                 
687                 return 1;
688         }
689         catch(exception& e) {
690                 m->errorOut(e, "DistanceCommand", "driver");
691                 exit(1);
692         }
693 }
694 /**************************************************************************************************/
695 /////// need to fix to work with calcs and sequencedb
696 int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
697         try {
698                 ValidCalculators validCalculator;
699                 Dist* distCalculator;
700                 if (m->isTrue(countends) == true) {
701                         for (int i=0; i<Estimators.size(); i++) {
702                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
703                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
704                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
705                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
706                                 }
707                         }
708                 }else {
709                         for (int i=0; i<Estimators.size(); i++) {
710                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
711                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
712                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
713                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
714                                 }
715                         }
716                 }
717                 
718                 int startTime = time(NULL);
719                 
720                 //column file
721                 ofstream outFile(dFileName.c_str(), ios::trunc);
722                 outFile.setf(ios::fixed, ios::showpoint);
723                 outFile << setprecision(4);
724                 
725                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
726                 
727                 for(int i=startLine;i<endLine;i++){
728                                 
729                         string name = alignDB.get(i).getName();
730                         //pad with spaces to make compatible
731                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
732                                 
733                         outFile << name << '\t';        
734                         
735                         for(int j=0;j<alignDB.getNumSeqs();j++){
736                                 
737                                 if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
738                                 
739                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
740                                 double dist = distCalculator->getDist();
741                                 
742                                 outFile << dist << '\t'; 
743                         }
744                         
745                         outFile << endl; 
746                         
747                         if(i % 100 == 0){
748                                 m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");
749                         }
750                         
751                 }
752                 m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n");
753                 
754                 outFile.close();
755                 delete distCalculator;
756                 
757                 return 1;
758         }
759         catch(exception& e) {
760                 m->errorOut(e, "DistanceCommand", "driver");
761                 exit(1);
762         }
763 }
764 #ifdef USE_MPI
765 /**************************************************************************************************/
766 /////// need to fix to work with calcs and sequencedb
767 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
768         try {
769                 
770                 ValidCalculators validCalculator;
771                 Dist* distCalculator;
772                 if (m->isTrue(countends) == true) {
773                         for (int i=0; i<Estimators.size(); i++) {
774                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
775                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
776                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
777                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
778                                 }
779                         }
780                 }else {
781                         for (int i=0; i<Estimators.size(); i++) {
782                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
783                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
784                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
785                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
786                                 }
787                         }
788                 }
789                 
790                 
791                 MPI_Status status;
792                 int startTime = time(NULL);
793                 
794                 string outputString = "";
795                 
796                 for(int i=startLine;i<endLine;i++){
797         
798                         for(int j=0;j<i;j++){
799                                 
800                                 if (m->control_pressed) {  delete distCalculator; return 0;  }
801                                 
802                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
803                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
804                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
805                                 
806                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
807                                 double dist = distCalculator->getDist();
808                                 
809                                 if(dist <= cutoff){
810                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
811                                 }
812                         }
813                         
814                         if(i % 100 == 0){
815                                 m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
816                         }
817                         
818                          
819                         //send results to parent
820                         int length = outputString.length();
821
822                         char* buf = new char[length];
823                         memcpy(buf, outputString.c_str(), length);
824                         
825                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
826                         outputString = "";
827                         delete buf;
828                         
829                 }
830                 
831                 m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n");
832                 delete distCalculator;
833                 return 1;
834         }
835         catch(exception& e) {
836                 m->errorOut(e, "DistanceCommand", "driverMPI");
837                 exit(1);
838         }
839 }
840 /**************************************************************************************************/
841 /////// need to fix to work with calcs and sequencedb
842 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size){
843         try {
844                 ValidCalculators validCalculator;
845                 Dist* distCalculator;
846                 if (m->isTrue(countends) == true) {
847                         for (int i=0; i<Estimators.size(); i++) {
848                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
849                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
850                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
851                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
852                                 }
853                         }
854                 }else {
855                         for (int i=0; i<Estimators.size(); i++) {
856                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
857                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
858                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
859                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
860                                 }
861                         }
862                 }
863                 
864                 
865                 MPI_Status status;
866                 
867                 MPI_File outMPI;
868                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
869
870                 //char* filename = new char[file.length()];
871                 //memcpy(filename, file.c_str(), file.length());
872                 
873                 char filename[1024];
874                 strcpy(filename, file.c_str());
875
876                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
877                 //delete filename;
878
879                 int startTime = time(NULL);
880                 
881                 string outputString = "";
882                 size = 0;
883                 
884                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
885                 
886                 for(int i=startLine;i<endLine;i++){
887                                 
888                         string name = alignDB.get(i).getName();
889                         if (name.length() < 10) { //pad with spaces to make compatible
890                                 while (name.length() < 10) {  name += " ";  }
891                         }
892                         outputString += name + "\t";    
893                         
894                         for(int j=0;j<i;j++){
895                                 
896                                 if (m->control_pressed) { delete distCalculator; return 0;  }
897                                 
898                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
899                                 double dist = distCalculator->getDist();
900                                 
901                                 outputString += toString(dist) + "\t"; 
902                         }
903                         
904                         outputString += "\n"; 
905
906                 
907                         if(i % 100 == 0){
908                                 m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");                   }
909                         
910                         
911                         //send results to parent
912                         int length = outputString.length();
913                         char* buf = new char[length];
914                         memcpy(buf, outputString.c_str(), length);
915                         
916                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
917                         size += outputString.length();
918                         outputString = "";
919                         delete buf;
920                 }
921                 
922                 m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n");
923                 
924                 MPI_File_close(&outMPI);
925                 delete distCalculator;
926                 
927                 return 1;
928         }
929         catch(exception& e) {
930                 m->errorOut(e, "DistanceCommand", "driverMPI");
931                 exit(1);
932         }
933 }
934 /**************************************************************************************************/
935 /////// need to fix to work with calcs and sequencedb
936 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){
937         try {
938                 ValidCalculators validCalculator;
939                 Dist* distCalculator;
940                 if (m->isTrue(countends) == true) {
941                         for (int i=0; i<Estimators.size(); i++) {
942                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
943                                         if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
944                                         else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
945                                         else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
946                                 }
947                         }
948                 }else {
949                         for (int i=0; i<Estimators.size(); i++) {
950                                 if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
951                                         if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
952                                         else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
953                                         else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
954                                 }
955                         }
956                 }
957                 
958                 MPI_Status status;
959                 
960                 MPI_File outMPI;
961                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
962
963                 //char* filename = new char[file.length()];
964                 //memcpy(filename, file.c_str(), file.length());
965                 
966                 char filename[1024];
967                 strcpy(filename, file.c_str());
968
969                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
970                 //delete filename;
971
972                 int startTime = time(NULL);
973                 
974                 string outputString = "";
975                 size = 0;
976                 
977                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
978                 
979                 for(int i=startLine;i<endLine;i++){
980                                 
981                         string name = alignDB.get(i).getName();
982                         if (name.length() < 10) { //pad with spaces to make compatible
983                                 while (name.length() < 10) {  name += " ";  }
984                         }
985                         outputString += name + "\t";    
986                         
987                         for(int j=0;j<alignDB.getNumSeqs();j++){
988                                 
989                                 if (m->control_pressed) { delete distCalculator; return 0;  }
990                                 
991                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
992                                 double dist = distCalculator->getDist();
993                                 
994                                 outputString += toString(dist) + "\t"; 
995                         }
996                         
997                         outputString += "\n"; 
998
999                 
1000                         if(i % 100 == 0){
1001                                 m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
1002                         }
1003                         
1004                         
1005                         //send results to parent
1006                         int length = outputString.length();
1007                         char* buf = new char[length];
1008                         memcpy(buf, outputString.c_str(), length);
1009                         
1010                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
1011                         size += outputString.length();
1012                         outputString = "";
1013                         delete buf;
1014                 }
1015                 
1016                 m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n");
1017                 
1018                 MPI_File_close(&outMPI);
1019                 delete distCalculator;
1020                 return 1;
1021         }
1022         catch(exception& e) {
1023                 m->errorOut(e, "DistanceCommand", "driverMPI");
1024                 exit(1);
1025         }
1026 }
1027 #endif
1028 /**************************************************************************************************
1029 int DistanceCommand::convertMatrix(string outputFile) {
1030         try{
1031
1032                 //sort file by first column so the distances for each row are together
1033                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1034                 
1035                 //use the unix sort 
1036                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1037                         string command = "sort -n " + outputFile + " -o " + outfile;
1038                         system(command.c_str());
1039                 #else //sort using windows sort
1040                         string command = "sort " + outputFile + " /O " + outfile;
1041                         system(command.c_str());
1042                 #endif
1043                 
1044
1045                 //output to new file distance for each row and save positions in file where new row begins
1046                 ifstream in;
1047                 m->openInputFile(outfile, in);
1048                 
1049                 ofstream out;
1050                 m->openOutputFile(outputFile, out);
1051                 
1052                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1053
1054                 out << alignDB.getNumSeqs() << endl;
1055                 
1056                 //get first currentRow
1057                 string first, currentRow, second;
1058                 float dist;
1059                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1060                 map<string, float>::iterator it;
1061                 
1062                 in >> first;
1063                 currentRow = first;
1064                 
1065                 rowDists[first] = 0.00; //distance to yourself is 0.0
1066                 
1067                 in.seekg(0);
1068                 //m->openInputFile(outfile, in);
1069                 
1070                 while(!in.eof()) {
1071                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1072                         
1073                         in >> first >> second >> dist; m->gobble(in);
1074                                 
1075                         if (first != currentRow) {
1076                                 //print out last row
1077                                 out << currentRow << '\t'; //print name
1078
1079                                 //print dists
1080                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1081                                         out << it->second << '\t';
1082                                 }
1083                                 out << endl;
1084                                 
1085                                 //start new row
1086                                 currentRow = first;
1087                                 rowDists.clear();
1088                                 rowDists[first] = 0.00;
1089                                 rowDists[second] = dist;
1090                         }else{
1091                                 rowDists[second] = dist;
1092                         }
1093                 }
1094                 //print out last row
1095                 out << currentRow << '\t'; //print name
1096                                 
1097                 //print dists
1098                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1099                         out << it->second << '\t';
1100                 }
1101                 out << endl;
1102                 
1103                 in.close();
1104                 out.close();
1105                 
1106                 m->mothurRemove(outfile);
1107                 
1108                 return 1;
1109                 
1110         }
1111         catch(exception& e) {
1112                 m->errorOut(e, "DistanceCommand", "convertMatrix");
1113                 exit(1);
1114         }
1115 }
1116 **************************************************************************************************
1117 int DistanceCommand::convertToLowerTriangle(string outputFile) {
1118         try{
1119
1120                 //sort file by first column so the distances for each row are together
1121                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
1122                 
1123                 //use the unix sort 
1124                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1125                         string command = "sort -n " + outputFile + " -o " + outfile;
1126                         system(command.c_str());
1127                 #else //sort using windows sort
1128                         string command = "sort " + outputFile + " /O " + outfile;
1129                         system(command.c_str());
1130                 #endif
1131                 
1132
1133                 //output to new file distance for each row and save positions in file where new row begins
1134                 ifstream in;
1135                 m->openInputFile(outfile, in);
1136                 
1137                 ofstream out;
1138                 m->openOutputFile(outputFile, out);
1139                 
1140                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1141
1142                 out << alignDB.getNumSeqs() << endl;
1143                 
1144                 //get first currentRow
1145                 string first, currentRow, second;
1146                 float dist;
1147                 int i, j;
1148                 i = 0; j = 0;
1149                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
1150                 map<string, float>::iterator it;
1151                 
1152                 in >> first;
1153                 currentRow = first;
1154                 
1155                 rowDists[first] = 0.00; //distance to yourself is 0.0
1156                 
1157                 in.seekg(0);
1158                 //m->openInputFile(outfile, in);
1159                 
1160                 while(!in.eof()) {
1161                         if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; }
1162                         
1163                         in >> first >> second >> dist; m->gobble(in);
1164                                 
1165                         if (first != currentRow) {
1166                                 //print out last row
1167                                 out << currentRow << '\t'; //print name
1168
1169                                 //print dists
1170                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1171                                         if (j >= i) { break; }
1172                                         out << it->second << '\t';
1173                                         j++;
1174                                 }
1175                                 out << endl;
1176                                 
1177                                 //start new row
1178                                 currentRow = first;
1179                                 rowDists.clear();
1180                                 rowDists[first] = 0.00;
1181                                 rowDists[second] = dist;
1182                                 j = 0;
1183                                 i++;
1184                         }else{
1185                                 rowDists[second] = dist;
1186                         }
1187                 }
1188                 //print out last row
1189                 out << currentRow << '\t'; //print name
1190                                 
1191                 //print dists
1192                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
1193                         out << it->second << '\t';
1194                 }
1195                 out << endl;
1196                 
1197                 in.close();
1198                 out.close();
1199                 
1200                 m->mothurRemove(outfile);
1201                 
1202                 return 1;
1203                 
1204         }
1205         catch(exception& e) {
1206                 m->errorOut(e, "DistanceCommand", "convertToLowerTriangle");
1207                 exit(1);
1208         }
1209 }
1210 **************************************************************************************************/
1211 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
1212 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
1213 //also check to make sure the 2 files have the same alignment length.
1214 bool DistanceCommand::sanityCheck() {
1215         try{
1216                 bool good = true;
1217                 
1218                 //make sure the 2 fasta files have the same alignment length
1219                 ifstream in;
1220                 m->openInputFile(fastafile, in);
1221                 int fastaAlignLength = 0;
1222                 if (in) { 
1223                         Sequence tempIn(in);
1224                         fastaAlignLength = tempIn.getAligned().length();
1225                 }
1226                 in.close();
1227                 
1228                 ifstream in2;
1229                 m->openInputFile(oldfastafile, in2);
1230                 int oldfastaAlignLength = 0;
1231                 if (in2) { 
1232                         Sequence tempIn2(in2);
1233                         oldfastaAlignLength = tempIn2.getAligned().length();
1234                 }
1235                 in2.close();
1236                 
1237                 if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length."); m->mothurOutEndLine(); return false;  }
1238                 
1239                 //read fasta file and save names as well as adding them to the alignDB
1240                 set<string> namesOldFasta;
1241                 
1242                 ifstream inFasta;
1243                 m->openInputFile(oldfastafile, inFasta);
1244                 
1245                 while (!inFasta.eof()) {
1246                         if (m->control_pressed) {  inFasta.close(); return good;  }
1247                 
1248                         Sequence temp(inFasta);
1249                         
1250                         if (temp.getName() != "") {
1251                                 namesOldFasta.insert(temp.getName());  //save name
1252                                 alignDB.push_back(temp);  //add to DB
1253                         }
1254                         
1255                         m->gobble(inFasta);
1256                 }
1257                 
1258                 inFasta.close();
1259                 
1260                 //read through the column file checking names and removing distances above the cutoff
1261                 ifstream inDist;
1262                 m->openInputFile(column, inDist);
1263                 
1264                 ofstream outDist;
1265                 string outputFile = column + ".temp";
1266                 m->openOutputFile(outputFile, outDist);
1267                 
1268                 string name1, name2;
1269                 float dist;
1270                 while (!inDist.eof()) {
1271                         if (m->control_pressed) {  inDist.close(); outDist.close(); m->mothurRemove(outputFile); return good;  }
1272                 
1273                         inDist >> name1 >> name2 >> dist; m->gobble(inDist);
1274                         
1275                         //both names are in fasta file and distance is below cutoff
1276                         if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) {  good = false; break;  }
1277                         else{
1278                                 if (dist <= cutoff) {
1279                                         outDist << name1 << '\t' << name2 << '\t' << dist << endl;
1280                                 }
1281                         }
1282                 }
1283                 
1284                 inDist.close();
1285                 outDist.close();
1286                 
1287                 if (good) {
1288                         m->mothurRemove(column);
1289                         rename(outputFile.c_str(), column.c_str());
1290                 }else{
1291                         m->mothurRemove(outputFile); //temp file is bad because file mismatch above
1292                 }
1293                 
1294                 return good;
1295                 
1296         }
1297         catch(exception& e) {
1298                 m->errorOut(e, "DistanceCommand", "sanityCheck");
1299                 exit(1);
1300         }
1301 }
1302 /**************************************************************************************************/
1303
1304
1305
1306