]> git.donarmstrong.com Git - mothur.git/blob - clusterdoturcommand.cpp
1.18.0
[mothur.git] / clusterdoturcommand.cpp
1 /*
2  *  clusterdoturcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clusterdoturcommand.h"
11 #include "clusterclassic.h"
12
13 //**********************************************************************************************************************
14 vector<string> ClusterDoturCommand::setParameters(){    
15         try {
16                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
19                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
20                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
21                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
22                 CommandParameter psim("sim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psim);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "ClusterDoturCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string ClusterDoturCommand::getHelpString(){    
37         try {
38                 string helpString = "";
39                 helpString += "The cluster.classic command clusters using the algorithm from dotur. \n";
40                 helpString += "The cluster.classic command parameter options are phylip, name, method, cuttoff, hard, sim, precision. Phylip is required, unless you have a valid current file.\n";
41                 helpString += "The cluster.classic command should be in the following format: \n";
42                 helpString += "cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
43                 helpString += "The acceptable cluster methods are furthest, nearest, weighted and average.  If no method is provided then average is assumed.\n";       
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "ClusterDoturCommand", "getHelpString");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 ClusterDoturCommand::ClusterDoturCommand(){     
53         try {
54                 abort = true; calledHelp = true;
55                 setParameters();
56                 vector<string> tempOutNames;
57                 outputTypes["list"] = tempOutNames;
58                 outputTypes["rabund"] = tempOutNames;
59                 outputTypes["sabund"] = tempOutNames;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
68 ClusterDoturCommand::ClusterDoturCommand(string option)  {
69         try{
70                 
71                 abort = false; calledHelp = false;   
72                 
73                 //allow user to run help
74                 if(option == "help") { help(); abort = true; calledHelp = true; }
75                 
76                 else {
77                         vector<string> myArray = setParameters();
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                 
84                         //check to make sure all parameters are valid for command
85                         map<string,string>::iterator it;
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
88                                         abort = true;
89                                 }
90                         }
91                         
92                         //if the user changes the input directory command factory will send this info to us in the output parameter 
93                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
94                         if (inputDir == "not found"){   inputDir = "";          }
95                         else {
96                                 string path;
97                                 it = parameters.find("phylip");
98                                 //user has given a template file
99                                 if(it != parameters.end()){ 
100                                         path = m->hasPath(it->second);
101                                         //if the user has not given a path then, add inputdir. else leave path alone.
102                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
103                                 }
104                                 
105                                 it = parameters.find("name");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
111                                 }
112
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["list"] = tempOutNames;
118                         outputTypes["rabund"] = tempOutNames;
119                         outputTypes["sabund"] = tempOutNames;
120                 
121                         //if the user changes the output directory command factory will send this info to us in the output parameter 
122                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
123                         
124                         //check for required parameters
125                         phylipfile = validParameter.validFile(parameters, "phylip", true);
126                         if (phylipfile == "not open") { abort = true; }
127                         else if (phylipfile == "not found") { 
128                                 phylipfile = m->getPhylipFile(); 
129                                 if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
130                                 else { 
131                                         m->mothurOut("You need to provide a phylip file with the cluster.classic command."); m->mothurOutEndLine(); 
132                                         abort = true; 
133                                 }       
134                         }       
135
136                 
137                         //check for optional parameter and set defaults
138                         namefile = validParameter.validFile(parameters, "name", true);
139                         if (namefile == "not open") { abort = true; }   
140                         else if (namefile == "not found") { namefile = ""; }
141                         
142                         string temp;
143                         temp = validParameter.validFile(parameters, "precision", false);
144                         if (temp == "not found") { temp = "100"; }
145                         //saves precision legnth for formatting below
146                         length = temp.length();
147                         convert(temp, precision); 
148                         
149                         temp = validParameter.validFile(parameters, "cutoff", false);
150                         if (temp == "not found") { temp = "10"; }
151                         convert(temp, cutoff); 
152                         cutoff += (5 / (precision * 10.0));  
153                         
154                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
155                         hard = m->isTrue(temp);
156                         
157                         temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
158                         sim = m->isTrue(temp); 
159                         
160                         method = validParameter.validFile(parameters, "method", false);
161                         if (method == "not found") { method = "average"; }
162                         
163                         if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { 
164                                 if (method == "furthest") { tag = "fn"; }
165                                 else if (method == "nearest") { tag = "nn"; }
166                                 else if (method == "average") { tag = "an"; }
167                                 else if (method == "weighted") { tag = "wn"; }
168                         }else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; }
169                 }
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
173                 exit(1);
174         }
175 }
176 //**********************************************************************************************************************
177
178 int ClusterDoturCommand::execute(){
179         try {
180         
181                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
182                 
183                 if(namefile != ""){     
184                         nameMap = new NameAssignment(namefile);
185                         nameMap->readMap();
186                 }else{
187                         nameMap = NULL;
188                 }
189                 
190                 //reads phylip file storing data in 2D vector, also fills list and rabund
191                 ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
192                 cluster->readPhylipFile(phylipfile, nameMap);
193                 
194                 if (m->control_pressed) { delete cluster; delete list; delete rabund; return 0; }
195                 
196                 list = cluster->getListVector();
197                 rabund = cluster->getRAbundVector();
198                                                 
199                 if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
200                 fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
201                         
202                 m->openOutputFile(fileroot+ tag + ".sabund",    sabundFile);
203                 m->openOutputFile(fileroot+ tag + ".rabund",    rabundFile);
204                 m->openOutputFile(fileroot+ tag + ".list",              listFile);
205                                 
206                 outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
207                 outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
208                 outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
209                 
210                 float previousDist = 0.00000;
211                 float rndPreviousDist = 0.00000;
212                 oldRAbund = *rabund;
213                 oldList = *list;
214
215                 //double saveCutoff = cutoff;
216                 
217                 int estart = time(NULL);
218         
219                 while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
220                         if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close();  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());         } outputTypes.clear();  return 0;  }
221                 
222                         cluster->update(cutoff);
223         
224                         float dist = cluster->getSmallDist();
225                         float rndDist;
226                         if (hard) {
227                                 rndDist = m->ceilDist(dist, precision); 
228                         }else{
229                                 rndDist = m->roundDist(dist, precision); 
230                         }
231
232                         if(previousDist <= 0.0000 && dist != previousDist){
233                                 printData("unique");
234                         }
235                         else if(rndDist != rndPreviousDist){
236                                 printData(toString(rndPreviousDist,  length-1));
237                         }
238                 
239                         previousDist = dist;
240                         rndPreviousDist = rndDist;
241                         oldRAbund = *rabund;
242                         oldList = *list;
243                 }
244         
245                 if(previousDist <= 0.0000){
246                         printData("unique");
247                 }
248                 else if(rndPreviousDist<cutoff){
249                         printData(toString(rndPreviousDist, length-1));
250                 }
251                                         
252                 sabundFile.close();
253                 rabundFile.close();
254                 listFile.close();
255                 
256                 delete cluster; delete nameMap; delete list; delete rabund;
257         
258                 //if (saveCutoff != cutoff) { 
259                 //      if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
260                 //      else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
261                 //      m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
262                 //}
263                 
264                 //set list file as new current listfile
265                 string current = "";
266                 itTypes = outputTypes.find("list");
267                 if (itTypes != outputTypes.end()) {
268                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
269                 }
270                 
271                 //set rabund file as new current rabundfile
272                 itTypes = outputTypes.find("rabund");
273                 if (itTypes != outputTypes.end()) {
274                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
275                 }
276                 
277                 //set sabund file as new current sabundfile
278                 itTypes = outputTypes.find("sabund");
279                 if (itTypes != outputTypes.end()) {
280                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
281                 }
282                 
283                 m->mothurOutEndLine();
284                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
285                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
286                 m->mothurOutEndLine();
287
288                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
289
290                 return 0;
291         }
292         catch(exception& e) {
293                 m->errorOut(e, "ClusterDoturCommand", "execute");
294                 exit(1);
295         }
296 }
297
298 //**********************************************************************************************************************
299
300 void ClusterDoturCommand::printData(string label){
301         try {
302         
303                 oldRAbund.setLabel(label);
304                 oldRAbund.print(rabundFile);
305                 oldRAbund.getSAbundVector().print(sabundFile);
306                 
307                 oldRAbund.getSAbundVector().print(cout);
308                 
309                 oldList.setLabel(label);
310                 oldList.print(listFile);
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "ClusterDoturCommand", "printData");
314                 exit(1);
315         }
316 }
317 //**********************************************************************************************************************