]> git.donarmstrong.com Git - mothur.git/blob - clusterdoturcommand.cpp
changes while testing 1.27
[mothur.git] / clusterdoturcommand.cpp
1 /*
2  *  clusterdoturcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clusterdoturcommand.h"
11 #include "clusterclassic.h"
12
13 //**********************************************************************************************************************
14 vector<string> ClusterDoturCommand::setParameters(){    
15         try {
16                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
17                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
19                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter psim("sim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psim);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "ClusterDoturCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string ClusterDoturCommand::getHelpString(){    
38         try {
39                 string helpString = "";
40                 helpString += "The cluster.classic command clusters using the algorithm from dotur. \n";
41                 helpString += "The cluster.classic command parameter options are phylip, name, count, method, cuttoff, hard, sim, precision. Phylip is required, unless you have a valid current file.\n";
42                 helpString += "The cluster.classic command should be in the following format: \n";
43                 helpString += "cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
44                 helpString += "The acceptable cluster methods are furthest, nearest, weighted and average.  If no method is provided then average is assumed.\n";       
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ClusterDoturCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string ClusterDoturCommand::getOutputFileNameTag(string type, string inputName=""){     
54         try {
55         string outputFileName = "";
56                 map<string, vector<string> >::iterator it;
57         
58         //is this a type this command creates
59         it = outputTypes.find(type);
60         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
61         else {
62             if (type == "list") {  outputFileName =  "list"; }
63             else if (type == "rabund") {  outputFileName =  "rabund"; }
64             else if (type == "sabund") {  outputFileName =  "sabund"; }
65             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
66         }
67         return outputFileName;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "ClusterDoturCommand", "getOutputFileNameTag");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 ClusterDoturCommand::ClusterDoturCommand(){     
76         try {
77                 abort = true; calledHelp = true;
78                 setParameters();
79                 vector<string> tempOutNames;
80                 outputTypes["list"] = tempOutNames;
81                 outputTypes["rabund"] = tempOutNames;
82                 outputTypes["sabund"] = tempOutNames;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
86                 exit(1);
87         }
88 }
89 //**********************************************************************************************************************
90 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
91 ClusterDoturCommand::ClusterDoturCommand(string option)  {
92         try{
93                 
94                 abort = false; calledHelp = false;   
95                 
96                 //allow user to run help
97                 if(option == "help") { help(); abort = true; calledHelp = true; }
98                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
99                 
100                 else {
101                         vector<string> myArray = setParameters();
102                         
103                         OptionParser parser(option);
104                         map<string,string> parameters = parser.getParameters();
105                         
106                         ValidParameters validParameter;
107                 
108                         //check to make sure all parameters are valid for command
109                         map<string,string>::iterator it;
110                         for (it = parameters.begin(); it != parameters.end(); it++) { 
111                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
112                                         abort = true;
113                                 }
114                         }
115                         
116                         //if the user changes the input directory command factory will send this info to us in the output parameter 
117                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
118                         if (inputDir == "not found"){   inputDir = "";          }
119                         else {
120                                 string path;
121                                 it = parameters.find("phylip");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
127                                 }
128                                 
129                                 it = parameters.find("name");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
135                                 }
136                 
137                 it = parameters.find("count");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
143                                 }
144                         }
145                         
146                         //initialize outputTypes
147                         vector<string> tempOutNames;
148                         outputTypes["list"] = tempOutNames;
149                         outputTypes["rabund"] = tempOutNames;
150                         outputTypes["sabund"] = tempOutNames;
151                 
152                         //if the user changes the output directory command factory will send this info to us in the output parameter 
153                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
154                         
155                         //check for required parameters
156                         phylipfile = validParameter.validFile(parameters, "phylip", true);
157                         if (phylipfile == "not open") { abort = true; }
158                         else if (phylipfile == "not found") { 
159                                 phylipfile = m->getPhylipFile(); 
160                                 if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
161                                 else { 
162                                         m->mothurOut("You need to provide a phylip file with the cluster.classic command."); m->mothurOutEndLine(); 
163                                         abort = true; 
164                                 }       
165                         }else { m->setPhylipFile(phylipfile); } 
166
167                 
168                         //check for optional parameter and set defaults
169                         namefile = validParameter.validFile(parameters, "name", true);
170                         if (namefile == "not open") { abort = true; namefile = ""; }    
171                         else if (namefile == "not found") { namefile = ""; }
172                         else { m->setNameFile(namefile); }
173             
174             countfile = validParameter.validFile(parameters, "count", true);
175                         if (countfile == "not open") { abort = true; countfile = ""; }  
176                         else if (countfile == "not found") { countfile = ""; }
177                         else { m->setCountTableFile(countfile); }
178                         
179             if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.classic command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
180             
181                         string temp;
182                         temp = validParameter.validFile(parameters, "precision", false);
183                         if (temp == "not found") { temp = "100"; }
184                         //saves precision legnth for formatting below
185                         length = temp.length();
186                         m->mothurConvert(temp, precision); 
187                         
188                         temp = validParameter.validFile(parameters, "cutoff", false);
189                         if (temp == "not found") { temp = "10"; }
190                         m->mothurConvert(temp, cutoff); 
191                         cutoff += (5 / (precision * 10.0));  
192                         
193                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
194                         hard = m->isTrue(temp);
195                         
196                         temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
197                         sim = m->isTrue(temp); 
198                         
199                         method = validParameter.validFile(parameters, "method", false);
200                         if (method == "not found") { method = "average"; }
201                         
202                         if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { 
203                                 if (method == "furthest") { tag = "fn"; }
204                                 else if (method == "nearest") { tag = "nn"; }
205                                 else if (method == "average") { tag = "an"; }
206                                 else if (method == "weighted") { tag = "wn"; }
207                         }else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; }
208                 }
209         }
210         catch(exception& e) {
211                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
212                 exit(1);
213         }
214 }
215 //**********************************************************************************************************************
216
217 int ClusterDoturCommand::execute(){
218         try {
219         
220                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
221                 
222         
223         ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
224         
225         NameAssignment* nameMap = NULL;
226         CountTable* ct = NULL;
227         if(namefile != "") {    
228                         nameMap = new NameAssignment(namefile);
229                         nameMap->readMap();
230             cluster->readPhylipFile(phylipfile, nameMap);
231             delete nameMap;
232                 }else if (countfile != "") {
233             ct = new CountTable();
234             ct->readTable(countfile);
235             cluster->readPhylipFile(phylipfile, ct);
236             delete ct;
237         }else {
238             cluster->readPhylipFile(phylipfile, nameMap);
239         }
240         tag = cluster->getTag();
241         
242                 if (m->control_pressed) { delete cluster; return 0; }
243                 
244                 list = cluster->getListVector();
245                 rabund = cluster->getRAbundVector();
246                                                                 
247                 if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
248                 fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
249                         
250         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
251         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
252         string listFileName = fileroot+ tag + ".";
253         if (countfile != "") { listFileName += "unique_"; }
254         listFileName += getOutputFileNameTag("list");
255         
256         if (countfile == "") {
257             m->openOutputFile(sabundFileName,   sabundFile);
258             m->openOutputFile(rabundFileName,   rabundFile);
259             outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
260             outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
261             
262         }
263                 m->openOutputFile(listFileName, listFile);
264         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
265                 
266                 float previousDist = 0.00000;
267                 float rndPreviousDist = 0.00000;
268                 oldRAbund = *rabund;
269                 oldList = *list;
270
271                 //double saveCutoff = cutoff;
272                 
273                 int estart = time(NULL);
274         
275                 while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
276                         if (m->control_pressed) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
277                 listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear();  return 0;  }
278                 
279                         cluster->update(cutoff);
280         
281                         float dist = cluster->getSmallDist();
282                         float rndDist;
283                         if (hard) {
284                                 rndDist = m->ceilDist(dist, precision); 
285                         }else{
286                                 rndDist = m->roundDist(dist, precision); 
287                         }
288
289                         if(previousDist <= 0.0000 && dist != previousDist){
290                                 printData("unique");
291                         }
292                         else if(rndDist != rndPreviousDist){
293                                 printData(toString(rndPreviousDist,  length-1));
294                         }
295                 
296                         previousDist = dist;
297                         rndPreviousDist = rndDist;
298                         oldRAbund = *rabund;
299                         oldList = *list;
300                 }
301         
302                 if(previousDist <= 0.0000){
303                         printData("unique");
304                 }
305                 else if(rndPreviousDist<cutoff){
306                         printData(toString(rndPreviousDist, length-1));
307                 }
308                 
309         if (countfile == "") {
310             sabundFile.close();
311             rabundFile.close();
312         }
313                 listFile.close();
314                 
315                 delete cluster;  delete list; delete rabund;
316                 
317                 //set list file as new current listfile
318                 string current = "";
319                 itTypes = outputTypes.find("list");
320                 if (itTypes != outputTypes.end()) {
321                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
322                 }
323                 
324                 //set rabund file as new current rabundfile
325                 itTypes = outputTypes.find("rabund");
326                 if (itTypes != outputTypes.end()) {
327                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
328                 }
329                 
330                 //set sabund file as new current sabundfile
331                 itTypes = outputTypes.find("sabund");
332                 if (itTypes != outputTypes.end()) {
333                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
334                 }
335                 
336                 m->mothurOutEndLine();
337                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
338                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
339                 m->mothurOutEndLine();
340
341                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
342
343                 return 0;
344         }
345         catch(exception& e) {
346                 m->errorOut(e, "ClusterDoturCommand", "execute");
347                 exit(1);
348         }
349 }
350
351 //**********************************************************************************************************************
352
353 void ClusterDoturCommand::printData(string label){
354         try {
355         oldRAbund.setLabel(label);
356         if (countfile == "") {
357             oldRAbund.print(rabundFile);
358             oldRAbund.getSAbundVector().print(sabundFile);
359         }
360
361                 oldRAbund.getSAbundVector().print(cout);
362                 
363                 oldList.setLabel(label);
364                 oldList.print(listFile);
365         }
366         catch(exception& e) {
367                 m->errorOut(e, "ClusterDoturCommand", "printData");
368                 exit(1);
369         }
370 }
371 //**********************************************************************************************************************