5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "clearcutcommand.h"
19 //**********************************************************************************************************************
20 vector<string> ClearcutCommand::setParameters(){
22 CommandParameter pphylip("phylip", "InputTypes", "", "", "FastaPhylip", "FastaPhylip", "none","tree",false,false,true); parameters.push_back(pphylip);
23 CommandParameter pfasta("fasta", "InputTypes", "", "", "FastaPhylip", "FastaPhylip", "none","tree",false,false,true); parameters.push_back(pfasta);
24 CommandParameter pverbose("verbose", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pverbose);
25 CommandParameter pquiet("quiet", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pquiet);
26 CommandParameter pversion("version", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pversion);
27 CommandParameter pseed("seed", "String", "", "", "*", "", "","",false,false); parameters.push_back(pseed);
28 CommandParameter pnorandom("norandom", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnorandom);
29 CommandParameter pshuffle("shuffle", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pshuffle);
30 CommandParameter pneighbor("neighbor", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pneighbor);
31 CommandParameter pexpblen("expblen", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pexpblen);
32 CommandParameter pexpdist("expdist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pexpdist);
33 CommandParameter pDNA("DNA", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pDNA);
34 CommandParameter pprotein("protein", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pprotein);
35 CommandParameter pjukes("jukes", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pjukes);
36 CommandParameter pkimura("kimura", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkimura);
37 CommandParameter pstdout("stdout", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pstdout);
38 CommandParameter pntrees("ntrees", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pntrees);
39 CommandParameter pmatrixout("matrixout", "String", "", "", "", "", "","",false,false); parameters.push_back(pmatrixout);
40 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
41 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
43 vector<string> myArray;
44 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
48 m->errorOut(e, "ClearcutCommand", "setParameters");
52 //**********************************************************************************************************************
53 string ClearcutCommand::getHelpString(){
55 string helpString = "";
56 helpString += "The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n";
57 helpString += "For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n";
58 helpString += "The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n";
59 helpString += "The phylip parameter allows you to enter your phylip formatted distance matrix. \n";
60 helpString += "The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n";
62 helpString += "The version parameter prints out the version of clearcut you are using, default=F. \n";
63 helpString += "The verbose parameter prints out more output from clearcut, default=F. \n";
64 helpString += "The quiet parameter turns on silent operation mode, default=F. \n";
65 helpString += "The seed parameter allows you to explicitly set the PRNG seed to a specific value. \n";
66 helpString += "The norandom parameter allows you to attempt joins deterministically, default=F. \n";
67 helpString += "The shuffle parameter allows you to randomly shuffle the distance matrix, default=F. \n";
68 helpString += "The neighbor parameter allows you to use traditional Neighbor-Joining algorithm, default=T. \n";
70 helpString += "The DNA parameter allows you to indicate your fasta file contains DNA sequences, default=F. \n";
71 helpString += "The protein parameter allows you to indicate your fasta file contains protein sequences, default=F. \n";
73 helpString += "The stdout parameter outputs your tree to STDOUT, default=F. \n";
74 helpString += "The matrixout parameter allows you to specify a filename to output a distance matrix to. \n";
75 helpString += "The ntrees parameter allows you to specify the number of output trees, default=1. \n";
76 helpString += "The expblen parameter allows you to use exponential notation for branch lengths, default=F. \n";
77 helpString += "The expdist parameter allows you to use exponential notation for distance outputs, default=F. \n";
79 helpString += "The clearcut command should be in the following format: \n";
80 helpString += "clearcut(phylip=yourDistanceFile) \n";
81 helpString += "Example: clearcut(phylip=abrecovery.phylip.dist) \n";
85 m->errorOut(e, "ClearcutCommand", "getHelpString");
89 //**********************************************************************************************************************
90 string ClearcutCommand::getOutputPattern(string type) {
94 if (type == "matrixout") { pattern = "[filename],"; }
95 else if (type == "tree") { pattern = "[filename],tre"; }
96 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
100 catch(exception& e) {
101 m->errorOut(e, "ClearcutCommand", "getOutputPattern");
105 //**********************************************************************************************************************
106 ClearcutCommand::ClearcutCommand(){
108 abort = true; calledHelp = true;
110 vector<string> tempOutNames;
111 outputTypes["tree"] = tempOutNames;
112 outputTypes["matrixout"] = tempOutNames;
114 catch(exception& e) {
115 m->errorOut(e, "ClearcutCommand", "ClearcutCommand");
119 /**************************************************************************************/
120 ClearcutCommand::ClearcutCommand(string option) {
122 abort = false; calledHelp = false;
124 //allow user to run help
125 if(option == "help") { help(); abort = true; calledHelp = true; }
126 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
129 vector<string> myArray = setParameters();
131 OptionParser parser(option);
132 map<string,string> parameters = parser.getParameters();
134 ValidParameters validParameter;
135 map<string, string>::iterator it;
137 //check to make sure all parameters are valid for command
138 for (it = parameters.begin(); it != parameters.end(); it++) {
139 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
142 //initialize outputTypes
143 vector<string> tempOutNames;
144 outputTypes["tree"] = tempOutNames;
145 outputTypes["matrixout"] = tempOutNames;
147 //if the user changes the input directory command factory will send this info to us in the output parameter
148 string inputDir = validParameter.validFile(parameters, "inputdir", false);
149 if (inputDir == "not found"){ inputDir = ""; }
152 it = parameters.find("fasta");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["fasta"] = inputDir + it->second; }
160 it = parameters.find("phylip");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["phylip"] = inputDir + it->second; }
169 //check for required parameters
170 fastafile = validParameter.validFile(parameters, "fasta", true);
171 if (fastafile == "not open") { fastafile = ""; abort = true; }
172 else if (fastafile == "not found") { fastafile = ""; }
173 else { inputFile = fastafile; m->setFastaFile(fastafile); }
175 phylipfile = validParameter.validFile(parameters, "phylip", true);
176 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
177 else if (phylipfile == "not found") { phylipfile = ""; }
178 else { inputFile = phylipfile; m->setPhylipFile(phylipfile); }
180 if ((phylipfile == "") && (fastafile == "")) {
181 //is there are current file available for either of these?
182 //give priority to phylip, then fasta
183 phylipfile = m->getPhylipFile();
184 if (phylipfile != "") { inputFile = phylipfile; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
186 fastafile = m->getFastaFile();
187 if (fastafile != "") { inputFile = fastafile; m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
189 m->mothurOut("No valid current files. You must provide a phylip or fasta file before you can use the clearcut command."); m->mothurOutEndLine();
194 if ((phylipfile != "") && (fastafile != "")) { m->mothurOut("You must provide either a phylip formatted distance matrix or an aligned fasta file, not BOTH."); m->mothurOutEndLine(); abort=true; }
197 //if the user changes the output directory command factory will send this info to us in the output parameter
198 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputFile); }
201 temp = validParameter.validFile(parameters, "version", false); if (temp == "not found"){ temp = "F"; }
202 version = m->isTrue(temp);
204 temp = validParameter.validFile(parameters, "verbose", false); if (temp == "not found"){ temp = "F"; }
205 verbose = m->isTrue(temp);
207 temp = validParameter.validFile(parameters, "quiet", false); if (temp == "not found"){ temp = "F"; }
208 quiet = m->isTrue(temp);
210 seed = validParameter.validFile(parameters, "seed", false); if (seed == "not found"){ seed = "*"; }
212 temp = validParameter.validFile(parameters, "norandom", false); if (temp == "not found"){ temp = "F"; }
213 norandom = m->isTrue(temp);
215 temp = validParameter.validFile(parameters, "shuffle", false); if (temp == "not found"){ temp = "F"; }
216 shuffle = m->isTrue(temp);
218 temp = validParameter.validFile(parameters, "neighbor", false); if (temp == "not found"){ temp = "T"; }
219 neighbor = m->isTrue(temp);
221 temp = validParameter.validFile(parameters, "DNA", false); if (temp == "not found"){ temp = "F"; }
222 DNA = m->isTrue(temp);
224 temp = validParameter.validFile(parameters, "protein", false); if (temp == "not found"){ temp = "F"; }
225 protein = m->isTrue(temp);
227 temp = validParameter.validFile(parameters, "jukes", false); if (temp == "not found"){ temp = "F"; }
228 jukes = m->isTrue(temp);
230 temp = validParameter.validFile(parameters, "kimura", false); if (temp == "not found"){ temp = "F"; }
231 kimura = m->isTrue(temp);
233 temp = validParameter.validFile(parameters, "stdout", false); if (temp == "not found"){ temp = "F"; }
234 stdoutWanted = m->isTrue(temp);
236 matrixout = validParameter.validFile(parameters, "matrixout", false); if (matrixout == "not found"){ matrixout = ""; }
238 ntrees = validParameter.validFile(parameters, "ntrees", false); if (ntrees == "not found"){ ntrees = "1"; }
240 temp = validParameter.validFile(parameters, "expblen", false); if (temp == "not found"){ temp = "F"; }
241 expblen = m->isTrue(temp);
243 temp = validParameter.validFile(parameters, "expdist", false); if (temp == "not found"){ temp = "F"; }
244 expdist = m->isTrue(temp);
246 if ((fastafile != "") && ((!DNA) && (!protein))) { m->mothurOut("You must specify the type of sequences you are using: DNA or protein"); m->mothurOutEndLine(); abort=true; }
250 catch(exception& e) {
251 m->errorOut(e, "ClearcutCommand", "ClearcutCommand");
255 /**************************************************************************************/
256 int ClearcutCommand::execute() {
259 if (abort == true) { if (calledHelp) { return 0; } return 2; }
262 map<string, string> variables;
263 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
264 string outputName = getOutputFileName("tree", variables);
265 outputNames.push_back(outputName); outputTypes["tree"].push_back(outputName);
269 char* tempClearcut = new char[9];
270 *tempClearcut = '\0'; strncat(tempClearcut, "clearcut", 8);
271 cPara.push_back(tempClearcut);
273 //you gave us a distance matrix
274 if (phylipfile != "") { char* temp = new char[11]; *temp = '\0'; strncat(temp, "--distance", 10); cPara.push_back(temp); }
276 //you gave us a fastafile
277 if (fastafile != "") { char* temp = new char[12]; *temp = '\0'; strncat(temp, "--alignment", 11); cPara.push_back(temp); }
279 if (version) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--version", 9); cPara.push_back(temp); }
280 if (verbose) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--verbose", 9); cPara.push_back(temp); }
281 if (quiet) { char* temp = new char[8]; *temp = '\0'; strncat(temp, "--quiet", 7); cPara.push_back(temp); }
283 string tempSeed = "--seed=" + seed;
284 char* temp = new char[tempSeed.length()+1];
285 *temp = '\0'; strncat(temp, tempSeed.c_str(), tempSeed.length());
286 cPara.push_back(temp);
288 if (norandom) { char* temp = new char[11]; *temp = '\0'; strncat(temp, "--norandom", 10); cPara.push_back(temp); }
289 if (shuffle) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--shuffle", 9); cPara.push_back(temp); }
290 if (neighbor) { char* temp = new char[11]; *temp = '\0'; strncat(temp, "--neighbor", 10); cPara.push_back(temp); }
292 string tempIn = "--in=" + inputFile;
293 char* tempI = new char[tempIn.length()+1];
294 *tempI = '\0'; strncat(tempI, tempIn.c_str(), tempIn.length());
295 cPara.push_back(tempI);
297 if (stdoutWanted) { char* temp = new char[9]; *temp = '\0'; strncat(temp, "--stdout", 8); cPara.push_back(temp); }
299 string tempOut = "--out=" + outputName;
300 char* temp = new char[tempOut.length()+1];
301 *temp = '\0'; strncat(temp, tempOut.c_str(), tempOut.length());
302 cPara.push_back(temp);
305 if (DNA) { char* temp = new char[6]; *temp = '\0'; strncat(temp, "--DNA", 5); cPara.push_back(temp); }
306 if (protein) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--protein", 9); cPara.push_back(temp); }
307 if (jukes) { char* temp = new char[8]; *temp = '\0'; strncat(temp, "--jukes", 7); cPara.push_back(temp); }
308 if (kimura) { char* temp = new char[9]; *temp = '\0'; strncat(temp, "--kimura", 8); cPara.push_back(temp); }
309 if (matrixout != "") {
310 string tempMatrix = "--matrixout=" + outputDir + matrixout;
311 char* temp = new char[tempMatrix.length()+1];
312 *temp = '\0'; strncat(temp, tempMatrix.c_str(), tempMatrix.length());
313 cPara.push_back(temp);
314 outputNames.push_back((outputDir + matrixout));
315 outputTypes["matrixout"].push_back((outputDir + matrixout));
319 string tempNtrees = "--ntrees=" + ntrees;
320 char* temp = new char[tempNtrees.length()+1];
321 *temp = '\0'; strncat(temp, tempNtrees.c_str(), tempNtrees.length());
322 cPara.push_back(temp);
325 if (expblen) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--expblen", 9); cPara.push_back(temp); }
326 if (expdist) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--expdist", 9); cPara.push_back(temp); }
328 char** clearcutParameters;
329 clearcutParameters = new char*[cPara.size()];
330 for (int i = 0; i < cPara.size(); i++) { clearcutParameters[i] = cPara[i]; }
331 int numArgs = cPara.size();
333 clearcut_main(numArgs, clearcutParameters);
336 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
337 delete[] clearcutParameters;
341 //set first tree file as new current treefile
342 string currentTree = "";
343 itTypes = outputTypes.find("tree");
344 if (itTypes != outputTypes.end()) {
345 if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
348 m->mothurOutEndLine();
349 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
350 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
351 m->mothurOutEndLine();
356 catch(exception& e) {
357 m->errorOut(e, "ClearcutCommand", "execute");
361 /**************************************************************************************/