]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.cpp
added getCommandInfoCommand for gui
[mothur.git] / pairwiseseqscommand.cpp
1 /*
2  *  pairwiseseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pairwiseseqscommand.h"
11 #include "sequence.hpp"
12
13 #include "gotohoverlap.hpp"
14 #include "needlemanoverlap.hpp"
15 #include "blastalign.hpp"
16 #include "noalign.hpp"
17
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
23
24
25 //**********************************************************************************************************************
26 vector<string> PairwiseSeqsCommand::setParameters(){    
27         try {
28                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
29                 CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign);
30                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
31                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
32                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
33                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
34                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
35                 CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
36                 CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
37                 CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
38                 CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
39                 CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                 
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "PairwiseSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string PairwiseSeqsCommand::getHelpString(){    
54         try {
55                 string helpString = "";
56                 helpString += "The pairwise.seqs command reads a fasta file and creates distance matrix.\n";
57                 helpString += "The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff and processors.\n";
58                 helpString += "The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
59                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
60                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
61                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
62                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
63                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
64                 helpString += "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap.\n";
65                 helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T.\n";
66                 helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n";
67                 helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n";
68                 helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed.  The default is false.\n";
69                 helpString += "The pairwise.seqs command should be in the following format: \n";
70                 helpString += "pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n";
71                 helpString += "Example pairwise.seqs(fasta=candidate.fasta, align=blast)\n";
72                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
73                 return helpString;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "PairwiseSeqsCommand", "getHelpString");
77                 exit(1);
78         }
79 }
80
81 //**********************************************************************************************************************
82 PairwiseSeqsCommand::PairwiseSeqsCommand(){     
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["phylip"] = tempOutNames;
88                 outputTypes["column"] = tempOutNames;
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
92                 exit(1);
93         }
94 }
95 //**********************************************************************************************************************
96 PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
97         try {
98                 abort = false; calledHelp = false;   
99         
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 
103                 else {
104                         vector<string> myArray = setParameters();
105                         
106                         OptionParser parser(option);
107                         map<string, string> parameters = parser.getParameters(); 
108                         
109                         ValidParameters validParameter("pairwise.seqs");
110                         map<string, string>::iterator it;
111                         
112                         //check to make sure all parameters are valid for command
113                         for (it = parameters.begin(); it != parameters.end(); it++) { 
114                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
115                         }
116                         
117                         //initialize outputTypes
118                         vector<string> tempOutNames;
119                         outputTypes["phylip"] = tempOutNames;
120                         outputTypes["column"] = tempOutNames;
121                         
122                         //if the user changes the output directory command factory will send this info to us in the output parameter 
123                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
124                         
125
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         
129                         if (inputDir == "not found"){   inputDir = "";          }
130
131                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
132                         if (fastaFileName == "not found") {                             
133                                 //if there is a current fasta file, use it
134                                 string filename = m->getFastaFile(); 
135                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
136                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
137                         }
138                         else { 
139                                 m->splitAtDash(fastaFileName, fastaFileNames);
140                                 
141                                 //go through files and make sure they are good, if not, then disregard them
142                                 for (int i = 0; i < fastaFileNames.size(); i++) {
143                                         
144                                         if (inputDir != "") {
145                                                 string path = m->hasPath(fastaFileNames[i]);
146                                                 //if the user has not given a path then, add inputdir. else leave path alone.
147                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
148                                         }
149         
150                                         int ableToOpen;
151                                         ifstream in;
152
153                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
154                                 
155                                         //if you can't open it, try default location
156                                         if (ableToOpen == 1) {
157                                                 if (m->getDefaultPath() != "") { //default path is set
158                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
159                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
160                                                         ifstream in2;
161                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
162                                                         in2.close();
163                                                         fastaFileNames[i] = tryPath;
164                                                 }
165                                         }
166                                         
167                                         //if you can't open it, try output location
168                                         if (ableToOpen == 1) {
169                                                 if (m->getOutputDir() != "") { //default path is set
170                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
171                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
172                                                         ifstream in2;
173                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
174                                                         in2.close();
175                                                         fastaFileNames[i] = tryPath;
176                                                 }
177                                         }
178                                         
179                                         in.close();                                     
180
181                                         if (ableToOpen == 1) { 
182                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
183                                                 //erase from file list
184                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
185                                                 i--;
186                                         }
187                                 }
188                                 
189                                 //make sure there is at least one valid file left
190                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
191                         }
192                 
193                         //check for optional parameter and set defaults
194                         // ...at some point should added some additional type checking...
195                         string temp;
196                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
197                         convert(temp, match);  
198                         
199                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
200                         convert(temp, misMatch);  
201                         
202                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
203                         convert(temp, gapOpen);  
204                         
205                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
206                         convert(temp, gapExtend); 
207                         
208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
209                         m->setProcessors(temp);
210                         convert(temp, processors);
211                         
212                         temp = validParameter.validFile(parameters, "cutoff", false);           if(temp == "not found"){        temp = "1.0"; }
213                         convert(temp, cutoff); 
214                         
215                         temp = validParameter.validFile(parameters, "countends", false);        if(temp == "not found"){        temp = "T";     }
216                         countends = m->isTrue(temp); 
217                         
218                         temp = validParameter.validFile(parameters, "compress", false);         if(temp == "not found"){  temp = "F"; }
219                         compress = m->isTrue(temp); 
220                         
221                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
222                         
223                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
224                         if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
225                         
226                         calc = validParameter.validFile(parameters, "calc", false);                     
227                         if (calc == "not found") { calc = "onegap";  }
228                         else { 
229                                  if (calc == "default")  {  calc = "onegap";  }
230                         }
231                         m->splitAtDash(calc, Estimators);
232                         
233                         ValidCalculators validCalculator;
234                         if (countends) {
235                                 for (int i=0; i<Estimators.size(); i++) {
236                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
237                                                 if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
238                                                 else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
239                                                 else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
240                                         }
241                                 }
242                         }else {
243                                 for (int i=0; i<Estimators.size(); i++) {
244                                         if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
245                                                 if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
246                                                 else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
247                                                 else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
248                                         }
249                                 }
250                         }
251                 }
252                 
253         }
254         catch(exception& e) {
255                 m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand");
256                 exit(1);
257         }
258 }
259 //**********************************************************************************************************************
260
261 int PairwiseSeqsCommand::execute(){
262         try {
263                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
264                 
265                 int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
266                 
267                 if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
268                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
269                 else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
270                 else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
271                 else {
272                         m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
273                         m->mothurOutEndLine();
274                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
275                 }
276                 
277                 cutoff += 0.005;
278                 
279                 for (int s = 0; s < fastaFileNames.size(); s++) {
280                         if (m->control_pressed) { outputTypes.clear(); return 0; }
281                         
282                         m->mothurOut("Processing sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
283                         
284                         if (outputDir == "") {  outputDir += m->hasPath(fastaFileNames[s]); }
285                         
286                         ifstream inFASTA;
287                         m->openInputFile(fastaFileNames[s], inFASTA);
288                         alignDB = SequenceDB(inFASTA); 
289                         inFASTA.close();
290                         
291                         int numSeqs = alignDB.getNumSeqs();
292                         int startTime = time(NULL);
293                         string outputFile = "";
294                                 
295                         if (output == "lt") { //does the user want lower triangle phylip formatted file 
296                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
297                                 remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile);
298                         }else if (output == "column") { //user wants column format
299                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
300                                 outputTypes["column"].push_back(outputFile);
301                                 remove(outputFile.c_str());
302                         }else { //assume square
303                                 outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
304                                 remove(outputFile.c_str());
305                                 outputTypes["phylip"].push_back(outputFile);
306                         }
307                         
308                         #ifdef USE_MPI
309                 
310                         int pid, start, end; 
311                         int tag = 2001;
312                                         
313                         MPI_Status status; 
314                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
315                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
316                         
317                         //each process gets where it should start and stop in the file
318                         if (output != "square") {
319                                 start = int (sqrt(float(pid)/float(processors)) * numSeqs);
320                                 end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
321                         }else{
322                                 start = int ((float(pid)/float(processors)) * numSeqs);
323                                 end = int ((float(pid+1)/float(processors)) * numSeqs);
324                         }
325                         
326                         if (output == "column") {
327                                 MPI_File outMPI;
328                                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
329
330                         char filename[1024];
331                                 strcpy(filename, outputFile.c_str());
332                                 
333                                 MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
334
335                                 if (pid == 0) { //you are the root process 
336                                 
337                                         //do your part
338                                         string outputMyPart;
339                                         
340                                         driverMPI(start, end, outMPI, cutoff); 
341                                         
342                                         if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
343                                 
344                                         //wait on chidren
345                                         for(int i = 1; i < processors; i++) { 
346                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   remove(outputFile.c_str()); delete distCalculator;  return 0; }
347                                                 
348                                                 char buf[5];
349                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
350                                         }
351                                 }else { //you are a child process
352                                         //do your part
353                                         driverMPI(start, end, outMPI, cutoff); 
354                                         
355                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str()); delete distCalculator;  return 0; }
356                                 
357                                         char buf[5];
358                                         strcpy(buf, "done"); 
359                                         //tell parent you are done.
360                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
361                                 }
362                                 
363                                 MPI_File_close(&outMPI);
364                                 
365                         }else { //lower triangle format
366                                 if (pid == 0) { //you are the root process 
367                                 
368                                         //do your part
369                                         string outputMyPart;
370                                         unsigned long int mySize;
371                                         
372                                         if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
373                                         else { driverMPI(start, end, outputFile, mySize, output); }
374                 
375                                         if (m->control_pressed) {  outputTypes.clear();   remove(outputFile.c_str()); delete distCalculator;  return 0; }
376                                         
377                                         int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
378                                         MPI_File outMPI;
379                                         MPI_File inMPI;
380
381                                         char filename[1024];
382                                         strcpy(filename, outputFile.c_str());
383
384                                         MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
385
386                                         //wait on chidren
387                                         for(int b = 1; b < processors; b++) { 
388                                                 unsigned long int fileSize;
389                                                 
390                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  remove(outputFile.c_str());  delete distCalculator;  return 0; }
391                                                 
392                                                 MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
393                                                 
394                                                 string outTemp = outputFile + toString(b) + ".temp";
395
396                                                 char* buf = new char[outTemp.length()];
397                                                 memcpy(buf, outTemp.c_str(), outTemp.length());
398                                                 
399                                                 MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
400                                                 delete buf;
401
402                                                 int count = 0;
403                                                 while (count < fileSize) { 
404                                                         char buf2[1];
405                                                         MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
406                                                         MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
407                                                         count += 1;
408                                                 }
409                                                 
410                                                 MPI_File_close(&inMPI); //deleted on close
411                                         }
412                                         
413                                         MPI_File_close(&outMPI);
414                                 }else { //you are a child process
415                                         //do your part
416                                         unsigned long int size;
417                                         if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
418                                         else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
419                                         
420                                         if (m->control_pressed) { delete distCalculator;  return 0; }
421                                 
422                                         //tell parent you are done.
423                                         MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
424                                 }
425                         }
426                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
427         #else           
428                                         
429                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
430                         //if you don't need to fork anything
431                         if(processors == 1){
432                                 if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
433                                 else { driver(0, numSeqs, outputFile, "square");  }
434                         }else{ //you have multiple processors
435                                 
436                                 for (int i = 0; i < processors; i++) {
437                                         distlinePair tempLine;
438                                         lines.push_back(tempLine);
439                                         if (output != "square") {
440                                                 lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs);
441                                                 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
442                                         }else{
443                                                 lines[i].start = int ((float(i)/float(processors)) * numSeqs);
444                                                 lines[i].end = int ((float(i+1)/float(processors)) * numSeqs);
445                                         }
446                                 }
447                                 
448                                 createProcesses(outputFile); 
449                         }
450                 #else
451                         //ifstream inFASTA;
452                         if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
453                         else { driver(0, numSeqs, outputFile, "square");  }
454                 #endif
455                 
456         #endif
457                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
458                         
459                         #ifdef USE_MPI
460                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
461                                                 
462                                 if (pid == 0) { //only one process should output to screen
463                         #endif
464                         
465                         ifstream fileHandle;
466                         fileHandle.open(outputFile.c_str());
467                         if(fileHandle) {
468                                 m->gobble(fileHandle);
469                                 if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
470                         }
471                         
472                         if (compress) {
473                                 m->mothurOut("Compressing..."); m->mothurOutEndLine();
474                                 m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
475                                 system(("gzip -v " + outputFile).c_str());
476                                 outputNames.push_back(outputFile + ".gz");
477                         }else { outputNames.push_back(outputFile); }
478                         
479                         #ifdef USE_MPI
480                                 }
481                         #endif
482                         
483                         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
484                         
485                         if (m->control_pressed) { outputTypes.clear();  delete distCalculator; remove(outputFile.c_str()); return 0; }
486                 }
487                         
488                 delete distCalculator;
489                 
490                 //set phylip file as new current phylipfile
491                 string current = "";
492                 itTypes = outputTypes.find("phylip");
493                 if (itTypes != outputTypes.end()) {
494                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
495                 }
496                 
497                 //set column file as new current columnfile
498                 itTypes = outputTypes.find("column");
499                 if (itTypes != outputTypes.end()) {
500                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
501                 }
502                 
503                 m->mothurOutEndLine();
504                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
505                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
506                 m->mothurOutEndLine();
507                 
508
509                 return 0;
510         }
511         catch(exception& e) {
512                 m->errorOut(e, "PairwiseSeqsCommand", "execute");
513                 exit(1);
514         }
515 }
516
517 /**************************************************************************************************/
518 void PairwiseSeqsCommand::createProcesses(string filename) {
519         try {
520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
521                 int process = 1;
522                 processIDS.clear();
523                 
524                 //loop through and create all the processes you want
525                 while (process != processors) {
526                         int pid = fork();
527                         
528                         if (pid > 0) {
529                                 processIDS.push_back(pid); 
530                                 process++;
531                         }else if (pid == 0){
532                                 if (output != "square") {  driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
533                                 else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
534                                 exit(0);
535                         }else { 
536                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
537                                 for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; kill (temp, SIGINT); }
538                                 exit(0);
539                         }
540                 }
541                 
542                 //parent do my part
543                 if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
544                 else { driver(lines[0].start, lines[0].end, filename, "square"); }
545
546                 
547                 //force parent to wait until all the processes are done
548                 for (int i=0;i<processIDS.size();i++) { 
549                         int temp = processIDS[i];
550                         wait(&temp);
551                 }
552                 
553                 //append and remove temp files
554                 for (int i=0;i<processIDS.size();i++) { 
555                         m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
556                         remove((filename + toString(processIDS[i]) + ".temp").c_str());
557                 }
558 #endif
559         }
560         catch(exception& e) {
561                 m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
562                 exit(1);
563         }
564 }
565
566 /**************************************************************************************************/
567 /////// need to fix to work with calcs and sequencedb
568 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
569         try {
570
571                 int startTime = time(NULL);
572                 
573                 //column file
574                 ofstream outFile(dFileName.c_str(), ios::trunc);
575                 outFile.setf(ios::fixed, ios::showpoint);
576                 outFile << setprecision(4);
577                 
578                 if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl;        }
579                 
580                 for(int i=startLine;i<endLine;i++){
581                         if(output == "lt")      {       
582                                 string name = alignDB.get(i).getName();
583                                 if (name.length() < 10) { //pad with spaces to make compatible
584                                         while (name.length() < 10) {  name += " ";  }
585                                 }
586                                 outFile << name << '\t';        
587                         }
588                         
589                         for(int j=0;j<i;j++){
590                                 
591                                 if (m->control_pressed) { outFile.close(); return 0;  }
592                                 
593                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
594                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
595                                 }
596                                 
597                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
598                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
599                                 }
600                                 
601                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
602                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
603                                 
604                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
605                                 seqI.setAligned(alignment->getSeqAAln());
606                                 seqJ.setAligned(alignment->getSeqBAln());
607
608                                 
609                                 distCalculator->calcDist(seqI, seqJ);
610                                 double dist = distCalculator->getDist();
611                                 
612                                 if(dist <= cutoff){
613                                         if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
614                                 }
615                                 if (output == "lt") {  outFile << dist << '\t'; }
616                         }
617                         
618                         if (output == "lt") { outFile << endl; }
619                         
620                         if(i % 100 == 0){
621                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
622                         }
623                         
624                 }
625                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
626                 
627                 outFile.close();
628                 
629                 return 1;
630         }
631         catch(exception& e) {
632                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
633                 exit(1);
634         }
635 }
636 /**************************************************************************************************/
637 /////// need to fix to work with calcs and sequencedb
638 int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, string square){
639         try {
640
641                 int startTime = time(NULL);
642                 
643                 //column file
644                 ofstream outFile(dFileName.c_str(), ios::trunc);
645                 outFile.setf(ios::fixed, ios::showpoint);
646                 outFile << setprecision(4);
647                 
648                 if(startLine == 0){     outFile << alignDB.getNumSeqs() << endl;        }
649                 
650                 for(int i=startLine;i<endLine;i++){
651                                 
652                         string name = alignDB.get(i).getName();
653                         //pad with spaces to make compatible
654                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
655                                 
656                         outFile << name << '\t';        
657                         
658                         for(int j=0;j<alignDB.getNumSeqs();j++){
659                                 
660                                 if (m->control_pressed) { outFile.close(); return 0;  }
661                                 
662                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
663                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
664                                 }
665                                 
666                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
667                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
668                                 }
669                                 
670                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
671                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
672                                 
673                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
674                                 seqI.setAligned(alignment->getSeqAAln());
675                                 seqJ.setAligned(alignment->getSeqBAln());
676                                 
677                                 distCalculator->calcDist(seqI, seqJ);
678                                 double dist = distCalculator->getDist();
679                                                                 
680                                 outFile << dist << '\t'; 
681                         }
682                         
683                         outFile << endl; 
684                         
685                         if(i % 100 == 0){
686                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
687                         }
688                         
689                 }
690                 m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
691                 
692                 outFile.close();
693                 
694                 return 1;
695         }
696         catch(exception& e) {
697                 m->errorOut(e, "PairwiseSeqsCommand", "driver");
698                 exit(1);
699         }
700 }
701 #ifdef USE_MPI
702 /**************************************************************************************************/
703 /////// need to fix to work with calcs and sequencedb
704 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
705         try {
706                 MPI_Status status;
707                 int startTime = time(NULL);
708                 
709                 string outputString = "";
710                 
711                 for(int i=startLine;i<endLine;i++){
712         
713                         for(int j=0;j<i;j++){
714                                 
715                                 if (m->control_pressed) {  return 0;  }
716                                 
717                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
718                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
719                                 }
720                                 
721                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
722                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
723                                 }
724                                 
725                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
726                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
727                                 
728                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
729                                 seqI.setAligned(alignment->getSeqAAln());
730                                 seqJ.setAligned(alignment->getSeqBAln());
731                                 
732                                 distCalculator->calcDist(seqI, seqJ);
733                                 double dist = distCalculator->getDist();
734                                 
735                                 if(dist <= cutoff){
736                                          outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); 
737                                 }
738                         }
739                         
740                         if(i % 100 == 0){
741                                 //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
742                                 cout << i << '\t' << (time(NULL) - startTime) << endl;
743                         }
744                         
745                          
746                         //send results to parent
747                         int length = outputString.length();
748
749                         char* buf = new char[length];
750                         memcpy(buf, outputString.c_str(), length);
751                         
752                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
753                         outputString = "";
754                         delete buf;
755                         
756                 }
757                 
758                 return 1;
759         }
760         catch(exception& e) {
761                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
762                 exit(1);
763         }
764 }
765 /**************************************************************************************************/
766 /////// need to fix to work with calcs and sequencedb
767 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
768         try {
769                 MPI_Status status;
770                 
771                 MPI_File outMPI;
772                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
773
774                 char filename[1024];
775                 strcpy(filename, file.c_str());
776
777                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
778
779                 
780                 
781                 string outputString = "";
782                 size = 0;
783                 
784                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
785                 
786                 for(int i=startLine;i<endLine;i++){
787                                 
788                         string name = alignDB.get(i).getName();
789                         if (name.length() < 10) { //pad with spaces to make compatible
790                                 while (name.length() < 10) {  name += " ";  }
791                         }
792                         outputString += name + "\t";    
793                         
794                         for(int j=0;j<i;j++){
795                                 
796                                 if (m->control_pressed) {  return 0;  }
797                                 
798                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
799                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
800                                 }
801                                 
802                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
803                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
804                                 }
805                                 
806                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
807                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
808                                 
809                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
810                                 seqI.setAligned(alignment->getSeqAAln());
811                                 seqJ.setAligned(alignment->getSeqBAln());
812                                 
813                                 distCalculator->calcDist(seqI, seqJ);
814                                 double dist = distCalculator->getDist();
815                                 
816                                 outputString += toString(dist) + "\t"; 
817                         }
818                         
819                         outputString += "\n"; 
820                         
821                         //send results to parent
822                         int length = outputString.length();
823                         char* buf = new char[length];
824                         memcpy(buf, outputString.c_str(), length);
825                         
826                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
827                         size += outputString.length();
828                         outputString = "";
829                         delete buf;
830                 }
831                 
832                 MPI_File_close(&outMPI);
833                 
834                 return 1;
835         }
836         catch(exception& e) {
837                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
838                 exit(1);
839         }
840 }
841 /**************************************************************************************************/
842 /////// need to fix to work with calcs and sequencedb
843 int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
844         try {
845                 MPI_Status status;
846                 
847                 MPI_File outMPI;
848                 int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
849
850                 char filename[1024];
851                 strcpy(filename, file.c_str());
852
853                 MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
854                 
855                 
856                 
857                 string outputString = "";
858                 size = 0;
859                 
860                 if(startLine == 0){     outputString += toString(alignDB.getNumSeqs()) + "\n";  }
861                 
862                 for(int i=startLine;i<endLine;i++){
863                                 
864                         string name = alignDB.get(i).getName();
865                         if (name.length() < 10) { //pad with spaces to make compatible
866                                 while (name.length() < 10) {  name += " ";  }
867                         }
868                         outputString += name + "\t";    
869                         
870                         for(int j=0;j<alignDB.getNumSeqs();j++){
871                                 
872                                 if (m->control_pressed) {  return 0;  }
873                                 
874                                 if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
875                                         alignment->resize(alignDB.get(i).getUnaligned().length()+1);
876                                 }
877                                 
878                                 if (alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
879                                         alignment->resize(alignDB.get(j).getUnaligned().length()+1);
880                                 }
881                                 
882                                 Sequence seqI(alignDB.get(i).getName(), alignDB.get(i).getAligned());
883                                 Sequence seqJ(alignDB.get(j).getName(), alignDB.get(j).getAligned());
884                                 
885                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
886                                 seqI.setAligned(alignment->getSeqAAln());
887                                 seqJ.setAligned(alignment->getSeqBAln());
888                                 
889                                 distCalculator->calcDist(seqI, seqJ);
890                                 double dist = distCalculator->getDist();
891                                 
892                                 outputString += toString(dist) + "\t"; 
893                         }
894                         
895                         outputString += "\n"; 
896
897                         //send results to parent
898                         int length = outputString.length();
899                         char* buf = new char[length];
900                         memcpy(buf, outputString.c_str(), length);
901                         
902                         MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
903                         size += outputString.length();
904                         outputString = "";
905                         delete buf;
906                 }
907                 
908                 MPI_File_close(&outMPI);
909                 
910                 return 1;
911         }
912         catch(exception& e) {
913                 m->errorOut(e, "PairwiseSeqsCommand", "driverMPI");
914                 exit(1);
915         }
916 }
917 #endif
918 /**************************************************************************************************/
919