]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
added pairwise.seqs
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17 #include "nast.hpp"
18
19 #include "ignoregaps.h"
20 #include "eachgapdist.h"
21 #include "eachgapignore.h"
22 #include "onegapdist.h"
23 #include "onegapignore.h"
24
25
26 //**********************************************************************************************************************
27 vector<string> PairwiseSeqsCommand::getValidParameters(){       
28         try {
29                 string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","calc","compress","cutoff","countends","output","outputdir","inputdir"};
30                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "PairwiseSeqsCommand", "getValidParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> PairwiseSeqsCommand::getRequiredParameters(){    
40         try {
41                 string AlignArray[] =  {"fasta"};
42                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> PairwiseSeqsCommand::getRequiredFiles(){ 
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "PairwiseSeqsCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
63         try {
64                 //initialize outputTypes
65                 vector<string> tempOutNames;
66                 outputTypes["phylip"] = tempOutNames;
67                 outputTypes["column"] = tempOutNames;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
76         try {
77                 abort = false;
78         
79                 //allow user to run help
80                 if(option == "help") { help(); abort = true; }
81                 
82                 else {
83                         
84                         //valid paramters for this command
85                         string AlignArray[] =  {"fasta","align","match","mismatch","gapopen","gapextend", "processors","cutoff","compress","calc","countends","output","outputdir","inputdir"};
86                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
87                         
88                         OptionParser parser(option);
89                         map<string, string> parameters = parser.getParameters(); 
90                         
91                         ValidParameters validParameter("pairwise.seqs");
92                         map<string, string>::iterator it;
93                         
94                         //check to make sure all parameters are valid for command
95                         for (it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["phylip"] = tempOutNames;
102                         outputTypes["column"] = tempOutNames;
103                         
104                         //if the user changes the output directory command factory will send this info to us in the output parameter 
105                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
106                         
107
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         
111                         if (inputDir == "not found"){   inputDir = "";          }
112
113                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
114                         if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the pairwise.seqs command."); m->mothurOutEndLine(); abort = true;  }
115                         else { 
116                                 m->splitAtDash(fastaFileName, fastaFileNames);
117                                 
118                                 //go through files and make sure they are good, if not, then disregard them
119                                 for (int i = 0; i < fastaFileNames.size(); i++) {
120                                         
121                                         if (inputDir != "") {
122                                                 string path = m->hasPath(fastaFileNames[i]);
123                                                 //if the user has not given a path then, add inputdir. else leave path alone.
124                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
125                                         }
126         
127                                         int ableToOpen;
128                                         ifstream in;
129
130                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
131                                 
132                                         //if you can't open it, try default location
133                                         if (ableToOpen == 1) {
134                                                 if (m->getDefaultPath() != "") { //default path is set
135                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
136                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
137                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
138                                                         fastaFileNames[i] = tryPath;
139                                                 }
140                                         }
141                                         
142                                         //if you can't open it, try output location
143                                         if (ableToOpen == 1) {
144                                                 if (m->getOutputDir() != "") { //default path is set
145                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
146                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
147                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
148                                                         fastaFileNames[i] = tryPath;
149                                                 }
150                                         }
151                                         
152                                         in.close();                                     
153
154                                         if (ableToOpen == 1) { 
155                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
156                                                 //erase from file list
157                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
158                                                 i--;
159                                         }
160                                 }
161                                 
162                                 //make sure there is at least one valid file left
163                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
164                         }
165                 
166                         //check for optional parameter and set defaults
167                         // ...at some point should added some additional type checking...
168                         string temp;
169                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
170                         convert(temp, match);  
171                         
172                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
173                         convert(temp, misMatch);  
174                         
175                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
176                         convert(temp, gapOpen);  
177                         
178                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
179                         convert(temp, gapExtend); 
180                         
181                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
182                         convert(temp, processors); 
183                         
184                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
185                         convert(temp, cutoff); 
186                         
187                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
188                         countends = m->isTrue(temp); 
189                         
190                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
191                         compress = m->isTrue(temp); 
192                         
193                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
194                         
195                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
196                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
197                         
198                         calc = validParameter.validFile(parameters, "calc", false);                     
199                         if (calc == "not found") { calc = "onegap";  }
200                         else { 
201                                  if (calc == "default")  {  calc = "onegap";  }
202                         }
203                         m->splitAtDash(calc, Estimators);
204                         
205                         ValidCalculators validCalculator;
206                         if (countends) {
207                                 for (int i=0; i<Estimators.size(); i++) {
208                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
209                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
210                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
211                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
212                                         }
213                                 }
214                         }else {
215                                 for (int i=0; i<Estimators.size(); i++) {
216                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
217                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
218                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
219                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
220                                         }
221                                 }
222                         }
223                 }
224                 
225         }
226         catch(exception& e) {
227                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
228                 exit(1);
229         }
230 }
231 //**********************************************************************************************************************
232 PairwiseSeqsCommand::~PairwiseSeqsCommand(){}
233 //**********************************************************************************************************************
234
235 void PairwiseSeqsCommand::help(){
236         try {
237                 m->mothurOut("The pairwise.seqs command reads a fasta file and creates distance matrix.\n");
238                 m->mothurOut("The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n");
239                 m->mothurOut("The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
240                 m->mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
241                 m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
242                 m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
243                 m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n");
244                 m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n");
245                 m->mothurOut("The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n");
246                 m->mothurOut("The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n");
247                 m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n");
248                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n");
249                 m->mothurOut("The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n");
250                 m->mothurOut("The pairwise.seqs command should be in the following format: \n");
251                 m->mothurOut("pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n");
252                 m->mothurOut("Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n");
253                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "PairwiseSeqsCommand", "help");
257                 exit(1);
258         }
259 }
260
261
262 //**********************************************************************************************************************
263
264 int PairwiseSeqsCommand::execute(){
265         try {
266                 if (abort == true) {    return 0;       }
267                 
268                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
269                 
270                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
271                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
272                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
273                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
274                 else {
275                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
276                         m->mothurOutEndLine();
277                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
278                 }
279                 
280                 cutoff += 0.005;
281                 
282                 for (int s = 0; s < fastaFileNames.size(); s++) {
283                         if (m->control_pressed) { outputTypes.clear(); return 0; }
284                         
285                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
286                         
287                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
288                         
289                         ifstream inFASTA;
290                         m->openInputFile(fastaFileNames[s], inFASTA);
291                         alignDB = SequenceDB(inFASTA); 
292                         inFASTA.close();
293                         
294                         int numSeqs = alignDB.getNumSeqs();
295                         int startTime = time(NULL);
296                         string outputFile = "";
297                                 
298                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
299                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
300                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
301                         }else if (output == "column") { //user wants column format
302                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
303                                 outputTypes["column"].push_back(outputFile);
304                                 remove(outputFile.c_str());
305                         }else { //assume square
306                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
307                                 remove(outputFile.c_str());
308                                 outputTypes["phylip"].push_back(outputFile);
309                         }
310                         
311                         #ifdef USE_MPI
312                 
313                         int pid, start, end; 
314                         int tag = 2001;
315                                         
316                         MPI_Status status; 
317                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
318                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
319                         
320                         //each process gets where it should start and stop in the file
321                         if (output != "square") {
322                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
323                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
324                         }else{
325                                 start = int ((float(pid)/float(processors)) * numSeqs);
326                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
327                         }
328                         
329                         if (output == "column") {
330                                 MPI_File outMPI;
331                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
332
333                         char filename[1024];
334                                 strcpy(filename, outputFile.c_str());
335                                 
336                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
337
338                                 if (pid == 0) { //you are the root process 
339                                 
340                                         //do your part
341                                         string outputMyPart;
342                                         
343                                         driverMPI(start, end, outMPI, cutoff); 
344                                         
345                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
346                                 
347                                         //wait on chidren
348                                         for(int i = 1; i < processors; i++) { 
349                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
350                                                 
351                                                 char buf[5];
352                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
353                                         }
354                                 }else { //you are a child process
355                                         //do your part
356                                         driverMPI(start, end, outMPI, cutoff); 
357                                         
358                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
359                                 
360                                         char buf[5];
361                                         strcpy(buf, "done"); 
362                                         //tell parent you are done.
363                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
364                                 }
365                                 
366                                 MPI_File_close(&outMPI);
367                                 
368                         }else { //lower triangle format
369                                 if (pid == 0) { //you are the root process 
370                                 
371                                         //do your part
372                                         string outputMyPart;
373                                         unsigned long int mySize;
374                                         
375                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
376                                         else { driverMPI(start, end, outputFile, mySize, output); }
377                 
378                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
379                                         
380                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
381                                         MPI_File outMPI;
382                                         MPI_File inMPI;
383
384                                         char filename[1024];
385                                         strcpy(filename, outputFile.c_str());
386
387                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
388
389                                         //wait on chidren
390                                         for(int b = 1; b < processors; b++) { 
391                                                 unsigned long int fileSize;
392                                                 
393                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
394                                                 
395                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
396                                                 
397                                                 string outTemp = outputFile + toString(b) + ".temp";
398
399                                                 char* buf = new char[outTemp.length()];
400                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
401                                                 
402                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
403                                                 delete buf;
404
405                                                 int count = 0;
406                                                 while (count < fileSize) { 
407                                                         char buf2[1];
408                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
409                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
410                                                         count += 1;
411                                                 }
412                                                 
413                                                 MPI_File_close(&inMPI); //deleted on close
414                                         }
415                                         
416                                         MPI_File_close(&outMPI);
417                                 }else { //you are a child process
418                                         //do your part
419                                         unsigned long int size;
420                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
421                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
422                                         
423                                         if (m->control_pressed) { delete distCalculator;  return 0; }
424                                 
425                                         //tell parent you are done.
426                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
427                                 }
428                         }
429                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
430         #else           
431                                         
432                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
433                         //if you don't need to fork anything
434                         if(processors == 1){
435                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
436                                 else { driver(0, numSeqs, outputFile, "square");  }
437                         }else{ //you have multiple processors
438                                 
439                                 for (int i = 0; i < processors; i++) {
440                                         lines.push_back(new linePair());
441                                         if (output != "square") {
442                                                 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
443                                                 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
444                                         }else{
445                                                 lines[i]->start = int ((float(i)/float(processors)) * numSeqs);
446                                                 lines[i]->end = int ((float(i+1)/float(processors)) * numSeqs);
447                                         }
448                                 }
449
450                                 createProcesses(outputFile); 
451                         
452                                 map<int, int>::iterator it = processIDS.begin();
453                                 rename((outputFile + toString(it->second) + ".temp").c_str(), outputFile.c_str());
454                                 it++;
455                                 
456                                 //append and remove temp files
457                                 for (; it != processIDS.end(); it++) {
458                                         m->appendFiles((outputFile + toString(it->second) + ".temp"), outputFile);
459                                         remove((outputFile + toString(it->second) + ".temp").c_str());
460                                 }
461                         }
462                 #else
463                         //ifstream inFASTA;
464                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
465                         else { driver(0, numSeqs, outputFile, "square");  }
466                 #endif
467                 
468         #endif
469                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
470                         
471                         #ifdef USE_MPI
472                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
473                                                 
474                                 if (pid == 0) { //only one process should output to screen
475                         #endif
476                         
477                         ifstream fileHandle;
478                         fileHandle.open(outputFile.c_str());
479                         if(fileHandle) {
480                                 m->gobble(fileHandle);
481                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
482                         }
483                         
484                         if (compress) {
485                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
486                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
487                                 system(("gzip -v " + outputFile).c_str());
488                                 outputNames.push_back(outputFile + ".gz");
489                         }else { outputNames.push_back(outputFile); }
490                         
491                         #ifdef USE_MPI
492                                 }
493                         #endif
494                         
495                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
496                         
497                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
498                 }
499                         
500                 delete distCalculator;
501                 
502                 m->mothurOutEndLine();
503                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
504                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
505                 m->mothurOutEndLine();
506                 
507
508                 return 0;
509         }
510         catch(exception& e) {
511                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
512                 exit(1);
513         }
514 }
515
516 /**************************************************************************************************/
517 void PairwiseSeqsCommand::createProcesses(string filename) {
518         try {
519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
520                 int process = 0;
521                 processIDS.clear();
522                 
523                 //loop through and create all the processes you want
524                 while (process != processors) {
525                         int pid = fork();
526                         
527                         if (pid > 0) {
528                                 processIDS[lines[process]->end] = pid;  //create map from line number to pid so you can append files in correct order later
529                                 process++;
530                         }else if (pid == 0){
531                                 if (output != "square") {  driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff); }
532                                 else { driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", "square"); }
533                                 exit(0);
534                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
535                 }
536         
537                 //force parent to wait until all the processes are done
538                 for (map<int, int>::iterator it = processIDS.begin(); it != processIDS.end(); it++) { 
539                         int temp = it->second;
540                         wait(&temp);
541                 }
542 #endif
543         }
544         catch(exception& e) {
545                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
546                 exit(1);
547         }
548 }
549
550 /**************************************************************************************************/
551 /////// need to fix to work with calcs and sequencedb
552 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
553         try {
554
555                 int startTime = time(NULL);
556                 
557                 //column file
558                 ofstream outFile(dFileName.c_str(), ios::trunc);
559                 outFile.setf(ios::fixed, ios::showpoint);
560                 outFile << setprecision(4);
561                 
562                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
563                 
564                 for(int i=startLine;i<endLine;i++){
565                         if(output == "lt")      {       
566                                 string name = alignDB.get(i).getName();
567                                 if (name.length() < 10) { //pad with spaces to make compatible
568                                         while (name.length() < 10) {  name += " ";  }
569                                 }
570                                 outFile << name << '\t';        
571                         }
572                         
573                         for(int j=0;j<i;j++){
574                                 
575                                 if (m->control_pressed) { outFile.close(); return 0;  }
576                                 
577                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
578                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
579                                 }
580                                 
581                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
582                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
583                                 }
584                                 
585                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
586                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
587                                 
588                                 Nast(alignment, seqI, seqJ);
589                                 
590                                 distCalculator->calcDist(*seqI, *seqJ);
591                                 double dist = distCalculator->getDist();
592                                 
593                                 delete seqI; delete seqJ;
594                                 
595                                 if(dist <= cutoff){
596                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
597                                 }
598                                 if (output == "lt") {  outFile << dist << '\t'; }
599                         }
600                         
601                         if (output == "lt") { outFile << endl; }
602                         
603                         if(i % 100 == 0){
604                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
605                         }
606                         
607                 }
608                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
609                 
610                 outFile.close();
611                 
612                 return 1;
613         }
614         catch(exception& e) {
615                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
616                 exit(1);
617         }
618 }
619 /**************************************************************************************************/
620 /////// need to fix to work with calcs and sequencedb
621 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
622         try {
623
624                 int startTime = time(NULL);
625                 
626                 //column file
627                 ofstream outFile(dFileName.c_str(), ios::trunc);
628                 outFile.setf(ios::fixed, ios::showpoint);
629                 outFile << setprecision(4);
630                 
631                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
632                 
633                 for(int i=startLine;i<endLine;i++){
634                                 
635                         string name = alignDB.get(i).getName();
636                         //pad with spaces to make compatible
637                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
638                                 
639                         outFile << name << '\t';        
640                         
641                         for(int j=0;j<alignDB.getNumSeqs();j++){
642                                 
643                                 if (m->control_pressed) { outFile.close(); return 0;  }
644                                 
645                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
646                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
647                                 }
648                                 
649                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
650                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
651                                 }
652                                 
653                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
654                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
655                                 
656                                 Nast(alignment, seqI, seqJ);
657                                 
658                                 distCalculator->calcDist(*seqI, *seqJ);
659                                 double dist = distCalculator->getDist();
660                                 
661                                 delete seqI; delete seqJ;
662                                                                 
663                                 outFile << dist << '\t'; 
664                         }
665                         
666                         outFile << endl; 
667                         
668                         if(i % 100 == 0){
669                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
670                         }
671                         
672                 }
673                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
674                 
675                 outFile.close();
676                 
677                 return 1;
678         }
679         catch(exception& e) {
680                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
681                 exit(1);
682         }
683 }
684 #ifdef USE_MPI
685 /**************************************************************************************************/
686 /////// need to fix to work with calcs and sequencedb
687 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
688         try {
689                 MPI_Status status;
690                 int startTime = time(NULL);
691                 
692                 string outputString = "";
693                 
694                 for(int i=startLine;i<endLine;i++){
695         
696                         for(int j=0;j<i;j++){
697                                 
698                                 if (m->control_pressed) {  return 0;  }
699                                 
700                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
701                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
702                                 }
703                                 
704                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
705                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
706                                 }
707                                 
708                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
709                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
710                                 
711                                 Nast(alignment, seqI, seqJ);
712                                 
713                                 distCalculator->calcDist(*seqI, *seqJ);
714                                 double dist = distCalculator->getDist();
715                                 
716                                 delete seqI; delete seqJ;
717                                 
718                                 if(dist <= cutoff){
719                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
720                                 }
721                         }
722                         
723                         if(i % 100 == 0){
724                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
725                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
726                         }
727                         
728                          
729                         //send results to parent
730                         int length = outputString.length();
731
732                         char* buf = new char[length];
733                         memcpy(buf, outputString.c_str(), length);
734                         
735                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
736                         outputString = "";
737                         delete buf;
738                         
739                 }
740                 
741                 return 1;
742         }
743         catch(exception& e) {
744                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
745                 exit(1);
746         }
747 }
748 /**************************************************************************************************/
749 /////// need to fix to work with calcs and sequencedb
750 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
751         try {
752                 MPI_Status status;
753                 
754                 MPI_File outMPI;
755                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
756
757                 char filename[1024];
758                 strcpy(filename, file.c_str());
759
760                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
761
762                 int startTime = time(NULL);
763                 
764                 string outputString = "";
765                 size = 0;
766                 
767                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
768                 
769                 for(int i=startLine;i<endLine;i++){
770                                 
771                         string name = alignDB.get(i).getName();
772                         if (name.length() < 10) { //pad with spaces to make compatible
773                                 while (name.length() < 10) {  name += " ";  }
774                         }
775                         outputString += name + "\t";    
776                         
777                         for(int j=0;j<i;j++){
778                                 
779                                 if (m->control_pressed) {  return 0;  }
780                                 
781                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
782                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
783                                 }
784                                 
785                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
786                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
787                                 }
788                                 
789                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
790                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
791                                 
792                                 Nast(alignment, seqI, seqJ);
793                                 
794                                 distCalculator->calcDist(*seqI, *seqJ);
795                                 double dist = distCalculator->getDist();
796                                 
797                                 delete seqI; delete seqJ;
798                                 
799                                 outputString += toString(dist) + "\t"; 
800                         }
801                         
802                         outputString += "\n"; 
803                         
804                         //send results to parent
805                         int length = outputString.length();
806                         char* buf = new char[length];
807                         memcpy(buf, outputString.c_str(), length);
808                         
809                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
810                         size += outputString.length();
811                         outputString = "";
812                         delete buf;
813                 }
814                 
815                 MPI_File_close(&outMPI);
816                 
817                 return 1;
818         }
819         catch(exception& e) {
820                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
821                 exit(1);
822         }
823 }
824 /**************************************************************************************************/
825 /////// need to fix to work with calcs and sequencedb
826 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
827         try {
828                 MPI_Status status;
829                 
830                 MPI_File outMPI;
831                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
832
833                 char filename[1024];
834                 strcpy(filename, file.c_str());
835
836                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
837                 
838                 int startTime = time(NULL);
839                 
840                 string outputString = "";
841                 size = 0;
842                 
843                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
844                 
845                 for(int i=startLine;i<endLine;i++){
846                                 
847                         string name = alignDB.get(i).getName();
848                         if (name.length() < 10) { //pad with spaces to make compatible
849                                 while (name.length() < 10) {  name += " ";  }
850                         }
851                         outputString += name + "\t";    
852                         
853                         for(int j=0;j<alignDB.getNumSeqs();j++){
854                                 
855                                 if (m->control_pressed) {  return 0;  }
856                                 
857                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
858                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
859                                 }
860                                 
861                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
862                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
863                                 }
864                                 
865                                 Sequence* seqI = new Sequence(alignDB.get(i).getName(), alignDB.get(i).getAligned());
866                                 Sequence* seqJ = new Sequence(alignDB.get(j).getName(), alignDB.get(j).getAligned());
867                                 
868                                 Nast(alignment, seqI, seqJ);
869                                 
870                                 distCalculator->calcDist(*seqI, *seqJ);
871                                 double dist = distCalculator->getDist();
872                                 
873                                 delete seqI; delete seqJ;
874                                 
875                                 outputString += toString(dist) + "\t"; 
876                         }
877                         
878                         outputString += "\n"; 
879
880                         //send results to parent
881                         int length = outputString.length();
882                         char* buf = new char[length];
883                         memcpy(buf, outputString.c_str(), length);
884                         
885                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
886                         size += outputString.length();
887                         outputString = "";
888                         delete buf;
889                 }
890                 
891                 MPI_File_close(&outMPI);
892                 
893                 return 1;
894         }
895         catch(exception& e) {
896                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
897                 exit(1);
898         }
899 }
900 #endif
901 /**************************************************************************************************/
902