]> git.donarmstrong.com Git - mothur.git/blob - clusterdoturcommand.cpp
changed random forest output filename
[mothur.git] / clusterdoturcommand.cpp
1 /*
2  *  clusterdoturcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clusterdoturcommand.h"
11 #include "clusterclassic.h"
12
13 //**********************************************************************************************************************
14 vector<string> ClusterDoturCommand::setParameters(){    
15         try {
16                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pphylip);
17                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","rabund-sabund",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount);
19                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "","",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
23                 CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "ClusterDoturCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string ClusterDoturCommand::getHelpString(){    
38         try {
39                 string helpString = "";
40                 helpString += "The cluster.classic command clusters using the algorithm from dotur. \n";
41                 helpString += "The cluster.classic command parameter options are phylip, name, count, method, cuttoff, hard, sim, precision. Phylip is required, unless you have a valid current file.\n";
42                 helpString += "The cluster.classic command should be in the following format: \n";
43                 helpString += "cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
44                 helpString += "The acceptable cluster methods are furthest, nearest, weighted and average.  If no method is provided then average is assumed.\n";       
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ClusterDoturCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string ClusterDoturCommand::getOutputPattern(string type) {
54     try {
55         string pattern = "";
56         
57         if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } 
58         else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; } 
59         else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
60         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
61         
62         return pattern;
63     }
64     catch(exception& e) {
65         m->errorOut(e, "ClusterDoturCommand", "getOutputPattern");
66         exit(1);
67     }
68 }
69 //**********************************************************************************************************************
70 ClusterDoturCommand::ClusterDoturCommand(){     
71         try {
72                 abort = true; calledHelp = true;
73                 setParameters();
74                 vector<string> tempOutNames;
75                 outputTypes["list"] = tempOutNames;
76                 outputTypes["rabund"] = tempOutNames;
77                 outputTypes["sabund"] = tempOutNames;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
81                 exit(1);
82         }
83 }
84 //**********************************************************************************************************************
85 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
86 ClusterDoturCommand::ClusterDoturCommand(string option)  {
87         try{
88                 
89                 abort = false; calledHelp = false;   
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                 
103                         //check to make sure all parameters are valid for command
104                         map<string,string>::iterator it;
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
107                                         abort = true;
108                                 }
109                         }
110                         
111                         //if the user changes the input directory command factory will send this info to us in the output parameter 
112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
113                         if (inputDir == "not found"){   inputDir = "";          }
114                         else {
115                                 string path;
116                                 it = parameters.find("phylip");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
122                                 }
123                                 
124                                 it = parameters.find("name");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
130                                 }
131                 
132                 it = parameters.find("count");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
138                                 }
139                         }
140                         
141                         //initialize outputTypes
142                         vector<string> tempOutNames;
143                         outputTypes["list"] = tempOutNames;
144                         outputTypes["rabund"] = tempOutNames;
145                         outputTypes["sabund"] = tempOutNames;
146                 
147                         //if the user changes the output directory command factory will send this info to us in the output parameter 
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
149                         
150                         //check for required parameters
151                         phylipfile = validParameter.validFile(parameters, "phylip", true);
152                         if (phylipfile == "not open") { abort = true; }
153                         else if (phylipfile == "not found") { 
154                                 phylipfile = m->getPhylipFile(); 
155                                 if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
156                                 else { 
157                                         m->mothurOut("You need to provide a phylip file with the cluster.classic command."); m->mothurOutEndLine(); 
158                                         abort = true; 
159                                 }       
160                         }else { m->setPhylipFile(phylipfile); } 
161
162                 
163                         //check for optional parameter and set defaults
164                         namefile = validParameter.validFile(parameters, "name", true);
165                         if (namefile == "not open") { abort = true; namefile = ""; }    
166                         else if (namefile == "not found") { namefile = ""; }
167                         else { m->setNameFile(namefile); }
168             
169             countfile = validParameter.validFile(parameters, "count", true);
170                         if (countfile == "not open") { abort = true; countfile = ""; }  
171                         else if (countfile == "not found") { countfile = ""; }
172                         else { m->setCountTableFile(countfile); }
173                         
174             if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.classic command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
175             
176                         string temp;
177                         temp = validParameter.validFile(parameters, "precision", false);
178                         if (temp == "not found") { temp = "100"; }
179                         //saves precision legnth for formatting below
180                         length = temp.length();
181                         m->mothurConvert(temp, precision); 
182                         
183                         temp = validParameter.validFile(parameters, "cutoff", false);
184                         if (temp == "not found") { temp = "10"; }
185                         m->mothurConvert(temp, cutoff); 
186                         cutoff += (5 / (precision * 10.0));  
187                         
188                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
189                         hard = m->isTrue(temp);
190                         
191                         temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
192                         sim = m->isTrue(temp); 
193                         
194                         method = validParameter.validFile(parameters, "method", false);
195                         if (method == "not found") { method = "average"; }
196                         
197                         if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { 
198                                 if (method == "furthest") { tag = "fn"; }
199                                 else if (method == "nearest") { tag = "nn"; }
200                                 else if (method == "average") { tag = "an"; }
201                                 else if (method == "weighted") { tag = "wn"; }
202                         }else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; }
203                 }
204         }
205         catch(exception& e) {
206                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
207                 exit(1);
208         }
209 }
210 //**********************************************************************************************************************
211
212 int ClusterDoturCommand::execute(){
213         try {
214         
215                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
216                 
217         
218         ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
219         
220         NameAssignment* nameMap = NULL;
221         CountTable* ct = NULL;
222         if(namefile != "") {    
223                         nameMap = new NameAssignment(namefile);
224                         nameMap->readMap();
225             cluster->readPhylipFile(phylipfile, nameMap);
226             delete nameMap;
227                 }else if (countfile != "") {
228             ct = new CountTable();
229             ct->readTable(countfile, false);
230             cluster->readPhylipFile(phylipfile, ct);
231             delete ct;
232         }else {
233             cluster->readPhylipFile(phylipfile, nameMap);
234         }
235         tag = cluster->getTag();
236         
237                 if (m->control_pressed) { delete cluster; return 0; }
238                 
239                 list = cluster->getListVector();
240                 rabund = cluster->getRAbundVector();
241                                                                 
242                 if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
243                 fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
244                         
245         map<string, string> variables; 
246         variables["[filename]"] = fileroot;
247         variables["[clustertag]"] = tag;
248         string sabundFileName = getOutputFileName("sabund", variables);
249         string rabundFileName = getOutputFileName("rabund", variables);
250         if (countfile != "") { variables["[tag2]"] = "unique_list"; }
251         string listFileName = getOutputFileName("list", variables);
252         
253         if (countfile == "") {
254             m->openOutputFile(sabundFileName,   sabundFile);
255             m->openOutputFile(rabundFileName,   rabundFile);
256             outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
257             outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
258             
259         }
260                 m->openOutputFile(listFileName, listFile);
261         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
262                 
263                 float previousDist = 0.00000;
264                 float rndPreviousDist = 0.00000;
265                 oldRAbund = *rabund;
266                 oldList = *list;
267
268                 //double saveCutoff = cutoff;
269                 
270                 int estart = time(NULL);
271         
272                 while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
273                         if (m->control_pressed) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
274                 listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear();  return 0;  }
275                 
276                         cluster->update(cutoff);
277         
278                         float dist = cluster->getSmallDist();
279                         float rndDist;
280                         if (hard) {
281                                 rndDist = m->ceilDist(dist, precision); 
282                         }else{
283                                 rndDist = m->roundDist(dist, precision); 
284                         }
285
286                         if(previousDist <= 0.0000 && dist != previousDist){
287                                 printData("unique");
288                         }
289                         else if(rndDist != rndPreviousDist){
290                                 printData(toString(rndPreviousDist,  length-1));
291                         }
292                 
293                         previousDist = dist;
294                         rndPreviousDist = rndDist;
295                         oldRAbund = *rabund;
296                         oldList = *list;
297                 }
298         
299                 if(previousDist <= 0.0000){
300                         printData("unique");
301                 }
302                 else if(rndPreviousDist<cutoff){
303                         printData(toString(rndPreviousDist, length-1));
304                 }
305                 
306         if (countfile == "") {
307             sabundFile.close();
308             rabundFile.close();
309         }
310                 listFile.close();
311                 
312                 delete cluster;  delete list; delete rabund;
313                 
314                 //set list file as new current listfile
315                 string current = "";
316                 itTypes = outputTypes.find("list");
317                 if (itTypes != outputTypes.end()) {
318                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
319                 }
320                 
321                 //set rabund file as new current rabundfile
322                 itTypes = outputTypes.find("rabund");
323                 if (itTypes != outputTypes.end()) {
324                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
325                 }
326                 
327                 //set sabund file as new current sabundfile
328                 itTypes = outputTypes.find("sabund");
329                 if (itTypes != outputTypes.end()) {
330                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
331                 }
332                 
333                 m->mothurOutEndLine();
334                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
335                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
336                 m->mothurOutEndLine();
337
338                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
339
340                 return 0;
341         }
342         catch(exception& e) {
343                 m->errorOut(e, "ClusterDoturCommand", "execute");
344                 exit(1);
345         }
346 }
347
348 //**********************************************************************************************************************
349
350 void ClusterDoturCommand::printData(string label){
351         try {
352         oldRAbund.setLabel(label);
353         if (countfile == "") {
354             oldRAbund.print(rabundFile);
355             oldRAbund.getSAbundVector().print(sabundFile);
356         }
357
358                 oldRAbund.getSAbundVector().print(cout);
359                 
360                 oldList.setLabel(label);
361                 oldList.print(listFile);
362         }
363         catch(exception& e) {
364                 m->errorOut(e, "ClusterDoturCommand", "printData");
365                 exit(1);
366         }
367 }
368 //**********************************************************************************************************************