]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(string option) {
20         try {
21                 abort = false;
22                 Estimators.clear();
23                                 
24                 //allow user to run help
25                 if(option == "help") { help(); abort = true; }
26                 
27                 else {
28                         //valid paramters for this command
29                         string Array[] =  {"fasta","oldfasta","column", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir"};
30                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
31                         
32                         OptionParser parser(option);
33                         map<string, string> parameters = parser.getParameters();
34                         
35                         ValidParameters validParameter("dist.seqs");
36                         map<string, string>::iterator it2;
37                 
38                         //check to make sure all parameters are valid for command
39                         for (it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
40                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
41                         }
42                         
43                         //if the user changes the input directory command factory will send this info to us in the output parameter 
44                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
45                         if (inputDir == "not found"){   inputDir = "";          }
46                         else {
47                                 string path;
48                                 it2 = parameters.find("fasta");
49                                 //user has given a template file
50                                 if(it2 != parameters.end()){ 
51                                         path = m->hasPath(it2->second);
52                                         //if the user has not given a path then, add inputdir. else leave path alone.
53                                         if (path == "") {       parameters["fasta"] = inputDir + it2->second;           }
54                                 }
55                                 
56                                 it2 = parameters.find("oldfasta");
57                                 //user has given a template file
58                                 if(it2 != parameters.end()){ 
59                                         path = m->hasPath(it2->second);
60                                         //if the user has not given a path then, add inputdir. else leave path alone.
61                                         if (path == "") {       parameters["oldfasta"] = inputDir + it2->second;                }
62                                 }
63                                 
64                                 it2 = parameters.find("column");
65                                 //user has given a template file
66                                 if(it2 != parameters.end()){ 
67                                         path = m->hasPath(it2->second);
68                                         //if the user has not given a path then, add inputdir. else leave path alone.
69                                         if (path == "") {       parameters["column"] = inputDir + it2->second;          }
70                                 }
71                         }
72
73                         //check for required parameters
74                         fastafile = validParameter.validFile(parameters, "fasta", true);
75                         if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the dist.seqs command."); m->mothurOutEndLine(); abort = true; }
76                         else if (fastafile == "not open") { abort = true; }     
77                         else{
78                                 ifstream inFASTA;
79                                 m->openInputFile(fastafile, inFASTA);
80                                 alignDB = SequenceDB(inFASTA); 
81                                 inFASTA.close();
82                         }
83                         
84                         oldfastafile = validParameter.validFile(parameters, "oldfasta", true);
85                         if (oldfastafile == "not found") { oldfastafile = ""; }
86                         else if (oldfastafile == "not open") { abort = true; }  
87                         
88                         column = validParameter.validFile(parameters, "column", true);
89                         if (column == "not found") { column = ""; }
90                         else if (column == "not open") { abort = true; }        
91                         
92                         //if the user changes the output directory command factory will send this info to us in the output parameter 
93                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
94                                 outputDir = ""; 
95                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
96                         }
97
98                         //check for optional parameter and set defaults
99                         // ...at some point should added some additional type checking...
100                         calc = validParameter.validFile(parameters, "calc", false);                     
101                         if (calc == "not found") { calc = "onegap";  }
102                         else { 
103                                  if (calc == "default")  {  calc = "onegap";  }
104                         }
105                         m->splitAtDash(calc, Estimators);
106
107                         string temp;
108                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
109                         convert(temp, countends); 
110                         
111                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
112                         convert(temp, cutoff); 
113                         
114                         temp = validParameter.validFile(parameters, "processors", false);       if(temp == "not found"){        temp = "1"; }
115                         convert(temp, processors); 
116                         
117                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
118                         
119                         if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
120                         
121                         if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
122                         
123                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
124                         
125                         ValidCalculators validCalculator;
126                         
127                         if (m->isTrue(countends) == true) {
128                                 for (int i=0; i<Estimators.size(); i++) {
129                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
130                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
131                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
132                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
133                                         }
134                                 }
135                         }else {
136                                 for (int i=0; i<Estimators.size(); i++) {
137                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
138                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
139                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
140                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
141                                         }
142                                 }
143                         }
144
145                 }
146                                 
147         }
148         catch(exception& e) {
149                 m->errorOut(e, "DistanceCommand", "DistanceCommand");
150                 exit(1);
151         }
152 }
153
154 //**********************************************************************************************************************
155
156 DistanceCommand::~DistanceCommand(){
157         
158         for(int i=0;i<lines.size();i++){
159                 delete lines[i];
160         }
161         
162 }
163         
164 //**********************************************************************************************************************
165
166 void DistanceCommand::help(){
167         try {
168                 m->mothurOut("The dist.seqs command reads a file containing sequences and creates a distance file.\n");
169                 m->mothurOut("The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, cutoff and processors.  \n");
170                 m->mothurOut("The fasta parameter is required.\n");
171                 m->mothurOut("The oldfasta and column parameters allow you to append the distances calculated to the column file.\n");
172                 m->mothurOut("The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n");
173                 m->mothurOut("The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n");
174                 m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n");
175                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n");
176                 m->mothurOut("The processors parameter allows you to specify number of processors to use.  The default is 1.\n");
177                 m->mothurOut("The dist.seqs command should be in the following format: \n");
178                 m->mothurOut("dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n");
179                 m->mothurOut("Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n");
180                 m->mothurOut("Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc).\n\n");
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "DistanceCommand", "help");
184                 exit(1);
185         }
186 }
187 //**********************************************************************************************************************
188
189 int DistanceCommand::execute(){
190         try {
191                 
192                 if (abort == true) { return 0; }
193                 
194                 int startTime = time(NULL);
195                 
196                 //save number of new sequence
197                 numNewFasta = alignDB.getNumSeqs();
198                 
199                 //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
200                 if ((oldfastafile != "") && (column != ""))  {  if (!(sanityCheck())) { return 0; }  }
201                 
202                 if (m->control_pressed) { return 0; }
203                 
204                 int numSeqs = alignDB.getNumSeqs();
205                 cutoff += 0.005;
206                 
207                 string outputFile;
208                                 
209                 if (output == "lt") { //does the user want lower triangle phylip formatted file 
210                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip.dist";
211                         remove(outputFile.c_str());
212                         
213                         //output numSeqs to phylip formatted dist file
214                 }else if (output == "column") { //user wants column format
215                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
216                         
217                         //so we don't accidentally overwrite
218                         if (outputFile == column) { 
219                                 string tempcolumn = column + ".old"; 
220                                 rename(column.c_str(), tempcolumn.c_str());
221                         }
222                         
223                         remove(outputFile.c_str());
224                 }else { //assume square
225                         outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square.dist";
226                         remove(outputFile.c_str());
227                 }
228                 
229
230 #ifdef USE_MPI
231                 
232                 int pid, start, end; 
233                 int tag = 2001;
234                                 
235                 MPI_Status status; 
236                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
237                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
238                 
239                 //each process gets where it should start and stop in the file
240                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
241                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
242                 
243                 if (output != "lt") {
244                         MPI_File outMPI;
245                         int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
246
247                         //char* filename = new char[outputFile.length()];
248                         //memcpy(filename, outputFile.c_str(), outputFile.length());
249                         
250                         char filename[1024];
251                         strcpy(filename, outputFile.c_str());
252                         
253                         MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
254                         //delete filename;
255
256                         if (pid == 0) { //you are the root process 
257                         
258                                 //do your part
259                                 string outputMyPart;
260                                 driverMPI(start, end, outMPI, cutoff);
261                                 
262                                 if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
263                         
264                                 //wait on chidren
265                                 for(int i = 1; i < processors; i++) { 
266                                         if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
267                                         
268                                         char buf[4];
269                                         MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
270                                 }
271                         }else { //you are a child process
272                                 //do your part
273                                 driverMPI(start, end, outMPI, cutoff);
274                                 
275                                 if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
276                         
277                                 char buf[4];
278                                 strcpy(buf, "done"); 
279                                 //tell parent you are done.
280                                 MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
281                         }
282                         
283                         MPI_File_close(&outMPI);
284                         
285                 }else { //lower triangle format
286                         if (pid == 0) { //you are the root process 
287                         
288                                 //do your part
289                                 string outputMyPart;
290                                 long mySize;
291                                 driverMPI(start, end, outputFile, mySize);
292         
293                                 if (m->control_pressed) {  delete distCalculator;  return 0; }
294                                 
295                                 int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
296                                 MPI_File outMPI;
297                                 MPI_File inMPI;
298
299                                 //char* filename = new char[outputFile.length()];
300                                 //memcpy(filename, outputFile.c_str(), outputFile.length());
301                                 
302                                 char filename[1024];
303                                 strcpy(filename, outputFile.c_str());
304
305                                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
306                                 //delete filename;
307
308                                 //wait on chidren
309                                 for(int b = 1; b < processors; b++) { 
310                                         long fileSize;
311                                         
312                                         if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
313                                         
314                                         MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
315                                         
316                                         string outTemp = outputFile + toString(b) + ".temp";
317
318                                         char* buf = new char[outTemp.length()];
319                                         memcpy(buf, outTemp.c_str(), outTemp.length());
320                                         
321                                         MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
322                                         delete buf;
323
324                                         int count = 0;
325                                         while (count < fileSize) { 
326                                                 char buf2[1];
327                                                 MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
328                                                 MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
329                                                 count += 1;
330                                         }
331                                         
332                                         MPI_File_close(&inMPI); //deleted on close
333                                 }
334                                 
335                                 MPI_File_close(&outMPI);
336                         }else { //you are a child process
337                                 //do your part
338                                 long size;
339                                 driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size);
340                                 
341                                 if (m->control_pressed) { delete distCalculator;  return 0; }
342                         
343                                 //tell parent you are done.
344                                 MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
345                         }
346                 }
347                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
348 #else           
349                                 
350         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
351                 //if you don't need to fork anything
352                 if(processors == 1){
353                         driver(0, numSeqs, outputFile, cutoff);
354                 }else{ //you have multiple processors
355                         
356                         for (int i = 0; i < processors; i++) {
357                                 lines.push_back(new linePair());
358                                 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
359                                 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
360                         }
361
362                         createProcesses(outputFile); 
363                 
364                         map<int, int>::iterator it = processIDS.begin();
365                         rename((outputFile + toString(it->second) + ".temp").c_str(), outputFile.c_str());
366                         it++;
367                         
368                         //append and remove temp files
369                         for (; it != processIDS.end(); it++) {
370                                 m->appendFiles((outputFile + toString(it->second) + ".temp"), outputFile);
371                                 remove((outputFile + toString(it->second) + ".temp").c_str());
372                         }
373                 }
374         #else
375                 ifstream inFASTA;
376                 driver(0, numSeqs, outputFile, cutoff);
377         #endif
378         
379 #endif
380                 if (m->control_pressed) { delete distCalculator; remove(outputFile.c_str()); return 0; }
381                 
382                 #ifdef USE_MPI
383                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
384                                         
385                         if (pid == 0) { //only one process should output to screen
386                 #endif
387                 
388                 if (output == "square") {  convertMatrix(outputFile); }
389                 
390                 ifstream fileHandle;
391                 fileHandle.open(outputFile.c_str());
392                 if(fileHandle) {
393                         m->gobble(fileHandle);
394                         if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
395                 }
396                 
397                 //append the old column file to the new one
398                 if ((oldfastafile != "") && (column != ""))  {
399                         //we had to rename the column file so we didnt overwrite above, but we want to keep old name
400                         if (outputFile == column) { 
401                                 string tempcolumn = column + ".old";
402                                 m->appendFiles(tempcolumn, outputFile);
403                                 remove(tempcolumn.c_str());
404                         }else{
405                                 m->appendFiles(outputFile, column);
406                                 remove(outputFile.c_str());
407                                 outputFile = column;
408                         }
409                 }
410
411                 
412                 #ifdef USE_MPI
413                         }
414                 #endif
415                 
416                 if (m->control_pressed) { delete distCalculator; remove(outputFile.c_str()); return 0; }
417                 
418                 delete distCalculator;
419                 
420                 m->mothurOutEndLine();
421                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
422                 m->mothurOut(outputFile); m->mothurOutEndLine();
423                 m->mothurOutEndLine();
424                 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
425                 return 0;
426                 
427         }
428         catch(exception& e) {
429                 m->errorOut(e, "DistanceCommand", "execute");
430                 exit(1);
431         }
432 }
433 /**************************************************************************************************/
434 void DistanceCommand::createProcesses(string filename) {
435         try {
436 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
437                 int process = 0;
438                 processIDS.clear();
439                 
440                 //loop through and create all the processes you want
441                 while (process != processors) {
442                         int pid = fork();
443                         
444                         if (pid > 0) {
445                                 processIDS[lines[process]->end] = pid;  //create map from line number to pid so you can append files in correct order later
446                                 process++;
447                         }else if (pid == 0){
448                                 driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff);
449                                 exit(0);
450                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
451                 }
452         
453                 //force parent to wait until all the processes are done
454                 for (map<int, int>::iterator it = processIDS.begin(); it != processIDS.end(); it++) { 
455                         int temp = it->second;
456                         wait(&temp);
457                 }
458 #endif
459         }
460         catch(exception& e) {
461                 m->errorOut(e, "DistanceCommand", "createProcesses");
462                 exit(1);
463         }
464 }
465
466 /**************************************************************************************************/
467 /////// need to fix to work with calcs and sequencedb
468 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
469         try {
470
471                 int startTime = time(NULL);
472                 
473                 //column file
474                 ofstream outFile(dFileName.c_str(), ios::trunc);
475                 outFile.setf(ios::fixed, ios::showpoint);
476                 outFile << setprecision(4);
477                 
478                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
479                 
480                 for(int i=startLine;i<endLine;i++){
481                         if(output == "lt")      {       
482                                 string name = alignDB.get(i).getName();
483                                 if (name.length() < 10) { //pad with spaces to make compatible
484                                         while (name.length() < 10) {  name += " ";  }
485                                 }
486                                 outFile << name << '\t';        
487                         }
488                         for(int j=0;j<i;j++){
489                                 
490                                 if (m->control_pressed) { outFile.close(); return 0;  }
491                                 
492                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
493                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
494                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
495                                 
496                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
497                                 double dist = distCalculator->getDist();
498                                 
499                                 if(dist <= cutoff){
500                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
501                                 }
502                                 if (output == "lt") {  outFile << dist << '\t'; }
503                                 
504                                 if (output == "square") { //make a square column you can convert to square phylip
505                                         outFile << alignDB.get(i).getName() << '\t' << alignDB.get(j).getName() << '\t' << dist << endl;
506                                         outFile << alignDB.get(j).getName() << '\t' << alignDB.get(i).getName() << '\t' << dist << endl;
507                                 }
508
509                         }
510                         
511                         if (output == "lt") { outFile << endl; }
512                         
513                         if(i % 100 == 0){
514                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
515                         }
516                         
517                 }
518                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
519                 
520                 outFile.close();
521                 
522                 return 1;
523         }
524         catch(exception& e) {
525                 m->errorOut(e, "DistanceCommand", "driver");
526                 exit(1);
527         }
528 }
529 #ifdef USE_MPI
530 /**************************************************************************************************/
531 /////// need to fix to work with calcs and sequencedb
532 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
533         try {
534                 MPI_Status status;
535                 int startTime = time(NULL);
536                 
537                 string outputString = "";
538                 
539                 for(int i=startLine;i<endLine;i++){
540         
541                         for(int j=0;j<i;j++){
542                                 
543                                 if (m->control_pressed) {  return 0;  }
544                                 
545                                 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
546                                 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
547                                 if ((i >= numNewFasta) && (j >= numNewFasta)) { break; }
548                                 
549                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
550                                 double dist = distCalculator->getDist();
551                                 
552                                 if(dist <= cutoff){
553                                         if (output == "column") { outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); }
554                                 }
555                                 
556                                 if (output == "square") { //make a square column you can convert to square phylip
557                                         outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n');
558                                         outputString += (alignDB.get(j).getName() + ' ' + alignDB.get(i).getName() + ' ' + toString(dist) + '\n');
559                                 }
560                         }
561                         
562                         if(i % 100 == 0){
563                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
564                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
565                         }
566                         
567                          
568                         //send results to parent
569                         int length = outputString.length();
570
571                         char* buf = new char[length];
572                         memcpy(buf, outputString.c_str(), length);
573                         
574                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
575                         outputString = "";
576                         delete buf;
577                         
578                 }
579                 
580                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
581                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;                
582                 return 1;
583         }
584         catch(exception& e) {
585                 m->errorOut(e, "DistanceCommand", "driverMPI");
586                 exit(1);
587         }
588 }
589 /**************************************************************************************************/
590 /////// need to fix to work with calcs and sequencedb
591 int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& size){
592         try {
593                 MPI_Status status;
594                 
595                 MPI_File outMPI;
596                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
597
598                 //char* filename = new char[file.length()];
599                 //memcpy(filename, file.c_str(), file.length());
600                 
601                 char filename[1024];
602                 strcpy(filename, file.c_str());
603
604                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
605                 //delete filename;
606
607                 int startTime = time(NULL);
608                 
609                 string outputString = "";
610                 size = 0;
611                 
612                 if((output == "lt") && startLine == 0){ outputString += toString(alignDB.getNumSeqs()) + "\n";  }
613                 
614                 for(int i=startLine;i<endLine;i++){
615                         if(output == "lt")      {       
616                                 string name = alignDB.get(i).getName();
617                                 if (name.length() < 10) { //pad with spaces to make compatible
618                                         while (name.length() < 10) {  name += " ";  }
619                                 }
620                                 outputString += name + "\t";    
621                         }
622                         for(int j=0;j<i;j++){
623                                 
624                                 if (m->control_pressed) {  return 0;  }
625                                 
626                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
627                                 double dist = distCalculator->getDist();
628                                 
629                                 if (output == "lt") {  outputString += toString(dist) + "\t"; }
630                         }
631                         
632                         if (output == "lt") { outputString += "\n"; }
633
634                 
635                         if(i % 100 == 0){
636                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
637                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
638                         }
639                         
640                         
641                         //send results to parent
642                         int length = outputString.length();
643                         char* buf = new char[length];
644                         memcpy(buf, outputString.c_str(), length);
645                         
646                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
647                         size += outputString.length();
648                         outputString = "";
649                         delete buf;
650                 }
651                 
652                 //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
653                 cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
654                 MPI_File_close(&outMPI);
655                 
656                 return 1;
657         }
658         catch(exception& e) {
659                 m->errorOut(e, "DistanceCommand", "driverMPI");
660                 exit(1);
661         }
662 }
663 #endif
664 /**************************************************************************************************/
665 int DistanceCommand::convertMatrix(string outputFile) {
666         try{
667
668                 //sort file by first column so the distances for each row are together
669                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
670                 
671                 //use the unix sort 
672                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
673                         string command = "sort -n " + outputFile + " -o " + outfile;
674                         system(command.c_str());
675                 #else //sort using windows sort
676                         string command = "sort " + outputFile + " /O " + outfile;
677                         system(command.c_str());
678                 #endif
679                 
680
681                 //output to new file distance for each row and save positions in file where new row begins
682                 ifstream in;
683                 m->openInputFile(outfile, in);
684                 
685                 ofstream out;
686                 m->openOutputFile(outputFile, out);
687                 
688                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
689
690                 out << alignDB.getNumSeqs() << endl;
691                 
692                 //get first currentRow
693                 string first, currentRow, second;
694                 float dist;
695                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
696                 map<string, float>::iterator it;
697                 
698                 in >> first;
699                 currentRow = first;
700                 
701                 rowDists[first] = 0.00; //distance to yourself is 0.0
702                 
703                 in.seekg(0);
704                 //m->openInputFile(outfile, in);
705                 
706                 while(!in.eof()) {
707                         if (m->control_pressed) { in.close(); remove(outfile.c_str()); out.close(); return 0; }
708                         
709                         in >> first >> second >> dist; m->gobble(in);
710                                 
711                         if (first != currentRow) {
712                                 //print out last row
713                                 out << currentRow << '\t'; //print name
714
715                                 //print dists
716                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
717                                         out << it->second << '\t';
718                                 }
719                                 out << endl;
720                                 
721                                 //start new row
722                                 currentRow = first;
723                                 rowDists.clear();
724                                 rowDists[first] = 0.00;
725                                 rowDists[second] = dist;
726                         }else{
727                                 rowDists[second] = dist;
728                         }
729                 }
730                 //print out last row
731                 out << currentRow << '\t'; //print name
732                                 
733                 //print dists
734                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
735                         out << it->second << '\t';
736                 }
737                 out << endl;
738                 
739                 in.close();
740                 out.close();
741                 
742                 remove(outfile.c_str());
743                 
744                 return 1;
745                 
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "DistanceCommand", "convertMatrix");
749                 exit(1);
750         }
751 }
752 /**************************************************************************************************/
753 int DistanceCommand::convertToLowerTriangle(string outputFile) {
754         try{
755
756                 //sort file by first column so the distances for each row are together
757                 string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
758                 
759                 //use the unix sort 
760                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
761                         string command = "sort -n " + outputFile + " -o " + outfile;
762                         system(command.c_str());
763                 #else //sort using windows sort
764                         string command = "sort " + outputFile + " /O " + outfile;
765                         system(command.c_str());
766                 #endif
767                 
768
769                 //output to new file distance for each row and save positions in file where new row begins
770                 ifstream in;
771                 m->openInputFile(outfile, in);
772                 
773                 ofstream out;
774                 m->openOutputFile(outputFile, out);
775                 
776                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
777
778                 out << alignDB.getNumSeqs() << endl;
779                 
780                 //get first currentRow
781                 string first, currentRow, second;
782                 float dist;
783                 int i, j;
784                 i = 0; j = 0;
785                 map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key 
786                 map<string, float>::iterator it;
787                 
788                 in >> first;
789                 currentRow = first;
790                 
791                 rowDists[first] = 0.00; //distance to yourself is 0.0
792                 
793                 in.seekg(0);
794                 //m->openInputFile(outfile, in);
795                 
796                 while(!in.eof()) {
797                         if (m->control_pressed) { in.close(); remove(outfile.c_str()); out.close(); return 0; }
798                         
799                         in >> first >> second >> dist; m->gobble(in);
800                                 
801                         if (first != currentRow) {
802                                 //print out last row
803                                 out << currentRow << '\t'; //print name
804
805                                 //print dists
806                                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
807                                         if (j >= i) { break; }
808                                         out << it->second << '\t';
809                                         j++;
810                                 }
811                                 out << endl;
812                                 
813                                 //start new row
814                                 currentRow = first;
815                                 rowDists.clear();
816                                 rowDists[first] = 0.00;
817                                 rowDists[second] = dist;
818                                 j = 0;
819                                 i++;
820                         }else{
821                                 rowDists[second] = dist;
822                         }
823                 }
824                 //print out last row
825                 out << currentRow << '\t'; //print name
826                                 
827                 //print dists
828                 for (it = rowDists.begin(); it != rowDists.end(); it++) {
829                         out << it->second << '\t';
830                 }
831                 out << endl;
832                 
833                 in.close();
834                 out.close();
835                 
836                 remove(outfile.c_str());
837                 
838                 return 1;
839                 
840         }
841         catch(exception& e) {
842                 m->errorOut(e, "DistanceCommand", "convertToLowerTriangle");
843                 exit(1);
844         }
845 }
846 /**************************************************************************************************/
847 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
848 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
849 //also check to make sure the 2 files have the same alignment length.
850 bool DistanceCommand::sanityCheck() {
851         try{
852                 bool good = true;
853                 
854                 //make sure the 2 fasta files have the same alignment length
855                 ifstream in;
856                 m->openInputFile(fastafile, in);
857                 int fastaAlignLength = 0;
858                 if (in) { 
859                         Sequence tempIn(in);
860                         fastaAlignLength = tempIn.getAligned().length();
861                 }
862                 in.close();
863                 
864                 ifstream in2;
865                 m->openInputFile(oldfastafile, in2);
866                 int oldfastaAlignLength = 0;
867                 if (in2) { 
868                         Sequence tempIn2(in2);
869                         oldfastaAlignLength = tempIn2.getAligned().length();
870                 }
871                 in2.close();
872                 
873                 if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length."); m->mothurOutEndLine(); return false;  }
874                 
875                 //read fasta file and save names as well as adding them to the alignDB
876                 set<string> namesOldFasta;
877                 
878                 ifstream inFasta;
879                 m->openInputFile(oldfastafile, inFasta);
880                 
881                 while (!inFasta.eof()) {
882                         if (m->control_pressed) {  inFasta.close(); return good;  }
883                 
884                         Sequence temp(inFasta);
885                         
886                         if (temp.getName() != "") {
887                                 namesOldFasta.insert(temp.getName());  //save name
888                                 alignDB.push_back(temp);  //add to DB
889                         }
890                         
891                         m->gobble(inFasta);
892                 }
893                 
894                 inFasta.close();
895                 
896                 //read through the column file checking names and removing distances above the cutoff
897                 ifstream inDist;
898                 m->openInputFile(column, inDist);
899                 
900                 ofstream outDist;
901                 string outputFile = column + ".temp";
902                 m->openOutputFile(outputFile, outDist);
903                 
904                 string name1, name2;
905                 float dist;
906                 while (!inDist.eof()) {
907                         if (m->control_pressed) {  inDist.close(); outDist.close(); remove(outputFile.c_str()); return good;  }
908                 
909                         inDist >> name1 >> name2 >> dist; m->gobble(inDist);
910                         
911                         //both names are in fasta file and distance is below cutoff
912                         if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) {  good = false; break;  }
913                         else{
914                                 if (dist <= cutoff) {
915                                         outDist << name1 << '\t' << name2 << '\t' << dist << endl;
916                                 }
917                         }
918                 }
919                 
920                 inDist.close();
921                 outDist.close();
922                 
923                 if (good) {
924                         remove(column.c_str());
925                         rename(outputFile.c_str(), column.c_str());
926                 }else{
927                         remove(outputFile.c_str()); //temp file is bad because file mismatch above
928                 }
929                 
930         }
931         catch(exception& e) {
932                 m->errorOut(e, "DistanceCommand", "m->appendFiles");
933                 exit(1);
934         }
935 }
936
937 /**************************************************************************************************
938 void DistanceCommand::m->appendFiles(string temp, string filename) {
939         try{
940                 ofstream output;
941                 ifstream input;
942         
943                 //open output file in append mode
944                 m->openOutputFileAppend(filename, output);
945                 m->openInputFile(temp, input);
946                 
947                 while(char c = input.get()){
948                         if(input.eof())         {       break;                  }
949                         else                            {       output << c;    }
950                 }
951                 
952                 input.close();
953                 output.close();
954         }
955         catch(exception& e) {
956                 m->errorOut(e, "DistanceCommand", "m->appendFiles");
957                 exit(1);
958         }
959 }
960 /**************************************************************************************************/
961
962