]> git.donarmstrong.com Git - mothur.git/blob - clearcutcommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / clearcutcommand.cpp
1 /*
2  *  clearcutcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/11/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clearcutcommand.h"
11 #ifdef __cplusplus
12 extern "C" {
13 #endif
14 #include "clearcut.h"
15 #ifdef __cplusplus
16 }
17 #endif
18
19 //**********************************************************************************************************************
20 vector<string> ClearcutCommand::setParameters(){        
21         try {
22                 CommandParameter pphylip("phylip", "InputTypes", "", "", "FastaPhylip", "FastaPhylip", "none","tree",false,false,true); parameters.push_back(pphylip);
23                 CommandParameter pfasta("fasta", "InputTypes", "", "", "FastaPhylip", "FastaPhylip", "none","tree",false,false,true); parameters.push_back(pfasta);
24                 CommandParameter pverbose("verbose", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pverbose);
25                 CommandParameter pquiet("quiet", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pquiet);
26                 CommandParameter pversion("version", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pversion);
27                 CommandParameter pseed("seed", "String", "", "", "*", "", "","",false,false); parameters.push_back(pseed);
28                 CommandParameter pnorandom("norandom", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnorandom);
29                 CommandParameter pshuffle("shuffle", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pshuffle);
30                 CommandParameter pneighbor("neighbor", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pneighbor);
31                 CommandParameter pexpblen("expblen", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pexpblen);
32                 CommandParameter pexpdist("expdist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pexpdist);
33                 CommandParameter pDNA("DNA", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pDNA);
34                 CommandParameter pprotein("protein", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pprotein);
35                 CommandParameter pjukes("jukes", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pjukes);
36                 CommandParameter pkimura("kimura", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkimura);
37                 CommandParameter pstdout("stdout", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pstdout);
38                 CommandParameter pntrees("ntrees", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pntrees);
39                 CommandParameter pmatrixout("matrixout", "String", "", "", "", "", "","",false,false); parameters.push_back(pmatrixout);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
42                 
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ClearcutCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string ClearcutCommand::getHelpString(){        
54         try {
55                 string helpString = "";
56                 helpString += "The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n";
57                 helpString += "For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n";
58                 helpString += "The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n";
59                 helpString += "The phylip parameter allows you to enter your phylip formatted distance matrix. \n";
60                 helpString += "The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n";
61                 
62                 helpString += "The version parameter prints out the version of clearcut you are using, default=F. \n";
63                 helpString += "The verbose parameter prints out more output from clearcut, default=F. \n";
64                 helpString += "The quiet parameter turns on silent operation mode, default=F. \n";
65                 helpString += "The seed parameter allows you to explicitly set the PRNG seed to a specific value. \n";
66                 helpString += "The norandom parameter allows you to attempt joins deterministically, default=F. \n";
67                 helpString += "The shuffle parameter allows you to randomly shuffle the distance matrix, default=F. \n";
68                 helpString += "The neighbor parameter allows you to use traditional Neighbor-Joining algorithm, default=T. \n";
69                 
70                 helpString += "The DNA parameter allows you to indicate your fasta file contains DNA sequences, default=F. \n";
71                 helpString += "The protein parameter allows you to indicate your fasta file contains protein sequences, default=F. \n";
72                 
73                 helpString += "The stdout parameter outputs your tree to STDOUT, default=F. \n";
74                 helpString += "The matrixout parameter allows you to specify a filename to output a distance matrix to. \n";
75                 helpString += "The ntrees parameter allows you to specify the number of output trees, default=1. \n";
76                 helpString += "The expblen parameter allows you to use exponential notation for branch lengths, default=F. \n";
77                 helpString += "The expdist parameter allows you to use exponential notation for distance outputs, default=F. \n";
78                 
79                 helpString += "The clearcut command should be in the following format: \n";
80                 helpString += "clearcut(phylip=yourDistanceFile) \n";
81                 helpString += "Example: clearcut(phylip=abrecovery.phylip.dist) \n";    
82                 return helpString;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "ClearcutCommand", "getHelpString");
86                 exit(1);
87         }
88 }
89 //**********************************************************************************************************************
90 string ClearcutCommand::getOutputPattern(string type) {
91     try {
92         string pattern = "";
93         
94         if (type == "matrixout") {  pattern = "[filename],"; } 
95         else if (type == "tree") {  pattern = "[filename],tre"; } 
96         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
97         
98         return pattern;
99     }
100     catch(exception& e) {
101         m->errorOut(e, "ClearcutCommand", "getOutputPattern");
102         exit(1);
103     }
104 }
105 //**********************************************************************************************************************
106 ClearcutCommand::ClearcutCommand(){     
107         try {
108                 abort = true; calledHelp = true; 
109                 setParameters();
110                 vector<string> tempOutNames;
111                 outputTypes["tree"] = tempOutNames;
112                 outputTypes["matrixout"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "ClearcutCommand", "ClearcutCommand");
116                 exit(1);
117         }
118 }
119 /**************************************************************************************/
120 ClearcutCommand::ClearcutCommand(string option)  {      
121         try {
122                 abort = false; calledHelp = false;   
123                 
124                 //allow user to run help
125                 if(option == "help") { help(); abort = true; calledHelp = true; }
126                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
127                 
128                 else {
129                         vector<string> myArray = setParameters();
130                         
131                         OptionParser parser(option);
132                         map<string,string> parameters = parser.getParameters();
133                         
134                         ValidParameters validParameter;
135                         map<string, string>::iterator it;
136                 
137                         //check to make sure all parameters are valid for command
138                         for (it = parameters.begin(); it != parameters.end(); it++) { 
139                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
140                         }
141                         
142                         //initialize outputTypes
143                         vector<string> tempOutNames;
144                         outputTypes["tree"] = tempOutNames;
145                         outputTypes["matrixout"] = tempOutNames;
146
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("fasta");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
158                                 }
159                                 
160                                 it = parameters.find("phylip");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
166                                 }
167                         }
168
169                         //check for required parameters
170                         fastafile = validParameter.validFile(parameters, "fasta", true);
171                         if (fastafile == "not open") { fastafile = ""; abort = true; }
172                         else if (fastafile == "not found") { fastafile = ""; }  
173                         else { inputFile = fastafile;  m->setFastaFile(fastafile); }
174                         
175                         phylipfile = validParameter.validFile(parameters, "phylip", true);
176                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
177                         else if (phylipfile == "not found") { phylipfile = ""; }
178                         else { inputFile = phylipfile;  m->setPhylipFile(phylipfile); }
179                                 
180                         if ((phylipfile == "") && (fastafile == "")) {  
181                                 //is there are current file available for either of these?
182                                 //give priority to phylip, then fasta
183                                 phylipfile = m->getPhylipFile(); 
184                                 if (phylipfile != "") {  inputFile = phylipfile; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
185                                 else { 
186                                         fastafile = m->getFastaFile(); 
187                                         if (fastafile != "") { inputFile = fastafile;  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
188                                         else { 
189                                                 m->mothurOut("No valid current files. You must provide a phylip or fasta file before you can use the clearcut command."); m->mothurOutEndLine(); 
190                                                 abort = true;
191                                         }
192                                 }
193                         }
194                         if ((phylipfile != "") && (fastafile != "")) {  m->mothurOut("You must provide either a phylip formatted distance matrix or an aligned fasta file, not BOTH."); m->mothurOutEndLine(); abort=true; }
195
196                         
197                         //if the user changes the output directory command factory will send this info to us in the output parameter 
198                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputFile);      }
199                         
200                         string temp;
201                         temp = validParameter.validFile(parameters, "version", false);          if (temp == "not found"){       temp = "F";                     }
202                         version = m->isTrue(temp);
203                         
204                         temp = validParameter.validFile(parameters, "verbose", false);          if (temp == "not found"){       temp = "F";                     }
205                         verbose = m->isTrue(temp); 
206                         
207                         temp = validParameter.validFile(parameters, "quiet", false);            if (temp == "not found"){       temp = "F";                     }
208                         quiet = m->isTrue(temp); 
209                         
210                         seed = validParameter.validFile(parameters, "seed", false);                     if (seed == "not found"){       seed = "*";                     }
211                         
212                         temp = validParameter.validFile(parameters, "norandom", false);         if (temp == "not found"){       temp = "F";                     }
213                         norandom = m->isTrue(temp); 
214                         
215                         temp = validParameter.validFile(parameters, "shuffle", false);          if (temp == "not found"){       temp = "F";                     }
216                         shuffle = m->isTrue(temp); 
217                         
218                         temp = validParameter.validFile(parameters, "neighbor", false);         if (temp == "not found"){       temp = "T";                     }
219                         neighbor = m->isTrue(temp); 
220                         
221                         temp = validParameter.validFile(parameters, "DNA", false);                      if (temp == "not found"){       temp = "F";                     }
222                         DNA = m->isTrue(temp);
223                         
224                         temp = validParameter.validFile(parameters, "protein", false);          if (temp == "not found"){       temp = "F";                     }
225                         protein = m->isTrue(temp);
226                         
227                         temp = validParameter.validFile(parameters, "jukes", false);            if (temp == "not found"){       temp = "F";                     }
228                         jukes = m->isTrue(temp);
229                         
230                         temp = validParameter.validFile(parameters, "kimura", false);           if (temp == "not found"){       temp = "F";                     }
231                         kimura = m->isTrue(temp);
232                         
233                         temp = validParameter.validFile(parameters, "stdout", false);           if (temp == "not found"){       temp = "F";                     }
234                         stdoutWanted = m->isTrue(temp); 
235                         
236                         matrixout = validParameter.validFile(parameters, "matrixout", false);   if (matrixout == "not found"){  matrixout = "";         }
237                         
238                         ntrees = validParameter.validFile(parameters, "ntrees", false);         if (ntrees == "not found"){     ntrees = "1";           }
239                         
240                         temp = validParameter.validFile(parameters, "expblen", false);          if (temp == "not found"){       temp = "F";                     }
241                         expblen = m->isTrue(temp);
242                         
243                         temp = validParameter.validFile(parameters, "expdist", false);          if (temp == "not found"){       temp = "F";                     }
244                         expdist = m->isTrue(temp);
245                         
246                         if ((fastafile != "") && ((!DNA) && (!protein))) { m->mothurOut("You must specify the type of sequences you are using: DNA or protein"); m->mothurOutEndLine(); abort=true; }
247                 }
248
249         }
250         catch(exception& e) {
251                 m->errorOut(e, "ClearcutCommand", "ClearcutCommand");
252                 exit(1);
253         }
254 }
255 /**************************************************************************************/
256 int ClearcutCommand::execute() {        
257         try {
258                 
259                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
260                 
261                 //prepare filename
262         map<string, string> variables; 
263         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
264                 string outputName = getOutputFileName("tree", variables);
265                 outputNames.push_back(outputName); outputTypes["tree"].push_back(outputName);
266                 
267                 vector<char*> cPara;
268                 
269                 char* tempClearcut = new char[9];  
270                 *tempClearcut = '\0'; strncat(tempClearcut, "clearcut", 8); 
271                 cPara.push_back(tempClearcut);
272                                 
273                 //you gave us a distance matrix
274                 if (phylipfile != "") {  char* temp = new char[11];  *temp = '\0'; strncat(temp, "--distance", 10);  cPara.push_back(temp);     }
275                 
276                 //you gave us a fastafile
277                 if (fastafile != "") { char* temp = new char[12];  *temp = '\0'; strncat(temp, "--alignment", 11);  cPara.push_back(temp);      }
278                 
279                 if (version)                    {  char* temp = new char[10];  *temp = '\0'; strncat(temp, "--version", 9);  cPara.push_back(temp);     }
280                 if (verbose)                    {  char* temp = new char[10];  *temp = '\0'; strncat(temp, "--verbose", 9);  cPara.push_back(temp);     }
281                 if (quiet)                              {  char* temp = new char[8];  *temp = '\0'; strncat(temp, "--quiet", 7);  cPara.push_back(temp);        }
282                 if (seed != "*")                {  
283                         string tempSeed = "--seed=" + seed;
284                         char* temp = new char[tempSeed.length()+1];
285                         *temp = '\0'; strncat(temp, tempSeed.c_str(), tempSeed.length());
286                         cPara.push_back(temp);
287                 }
288                 if (norandom)                   {  char* temp = new char[11];  *temp = '\0'; strncat(temp, "--norandom", 10);  cPara.push_back(temp);   }
289                 if (shuffle)                    {  char* temp = new char[10];  *temp = '\0'; strncat(temp, "--shuffle", 9);  cPara.push_back(temp);     }
290                 if (neighbor)                   {  char* temp = new char[11];  *temp = '\0'; strncat(temp, "--neighbor", 10);  cPara.push_back(temp);   }
291                 
292                 string tempIn = "--in=" + inputFile;  
293                 char* tempI = new char[tempIn.length()+1];
294                 *tempI = '\0'; strncat(tempI, tempIn.c_str(), tempIn.length());
295                 cPara.push_back(tempI);
296                 
297                 if (stdoutWanted)               {  char* temp = new char[9];  *temp = '\0'; strncat(temp, "--stdout", 8);  cPara.push_back(temp);       }
298                 else{  
299                         string tempOut = "--out=" + outputName;  
300                         char* temp = new char[tempOut.length()+1];
301                         *temp = '\0'; strncat(temp, tempOut.c_str(), tempOut.length());
302                         cPara.push_back(temp);
303                 }
304                         
305                 if (DNA)                                {  char* temp = new char[6];  *temp = '\0'; strncat(temp, "--DNA", 5);  cPara.push_back(temp);          }
306                 if (protein)                    {  char* temp = new char[10];  *temp = '\0'; strncat(temp, "--protein", 9);  cPara.push_back(temp);     }
307                 if (jukes)                              {  char* temp = new char[8];  *temp = '\0'; strncat(temp, "--jukes", 7);  cPara.push_back(temp);                }
308                 if (kimura)                             { char* temp = new char[9];  *temp = '\0'; strncat(temp, "--kimura", 8);  cPara.push_back(temp);                }
309                 if (matrixout != "")    {  
310                         string tempMatrix =  "--matrixout=" + outputDir + matrixout; 
311                         char* temp = new char[tempMatrix.length()+1];
312                         *temp = '\0'; strncat(temp, tempMatrix.c_str(), tempMatrix.length());
313                         cPara.push_back(temp);
314                         outputNames.push_back((outputDir + matrixout));
315                         outputTypes["matrixout"].push_back((outputDir + matrixout));
316                 }
317
318                 if (ntrees != "1")              {  
319                         string tempNtrees = "--ntrees=" + ntrees; 
320                         char* temp = new char[tempNtrees.length()+1];
321                         *temp = '\0'; strncat(temp, tempNtrees.c_str(), tempNtrees.length());
322                         cPara.push_back(temp);
323                 }
324
325                 if (expblen)                    { char* temp = new char[10];  *temp = '\0'; strncat(temp, "--expblen", 9);  cPara.push_back(temp);      }
326                 if (expdist)                    { char* temp = new char[10];  *temp = '\0'; strncat(temp, "--expdist", 9);  cPara.push_back(temp);      }
327                 
328                 char** clearcutParameters;
329                 clearcutParameters = new char*[cPara.size()];
330                 for (int i = 0; i < cPara.size(); i++) {  clearcutParameters[i] = cPara[i];  }
331                 int numArgs = cPara.size();
332                 
333                 clearcut_main(numArgs, clearcutParameters); 
334                 
335                 //free memory
336                 for(int i = 0; i < cPara.size(); i++)  {  delete[] cPara[i];  }
337                 delete[] clearcutParameters; 
338                 
339                 if (!stdoutWanted) {    
340                         
341                         //set first tree file as new current treefile
342                         string currentTree = "";
343                         itTypes = outputTypes.find("tree");
344                         if (itTypes != outputTypes.end()) {
345                                 if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
346                         }
347                         
348                         m->mothurOutEndLine();
349                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
350                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
351                         m->mothurOutEndLine();
352                 }
353
354                 return 0;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "ClearcutCommand", "execute");
358                 exit(1);
359         }
360 }
361 /**************************************************************************************/
362
363
364
365