]> git.donarmstrong.com Git - mothur.git/blob - clusterdoturcommand.cpp
added cluster.classic command
[mothur.git] / clusterdoturcommand.cpp
1 /*
2  *  clusterdoturcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clusterdoturcommand.h"
11 #include "clusterclassic.h"
12
13 //**********************************************************************************************************************
14 vector<string> ClusterDoturCommand::getValidParameters(){       
15         try {
16                 string AlignArray[] =  {"phylip","name","hard","cutoff","precision","method","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ClusterDoturCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ClusterDoturCommand::ClusterDoturCommand(){     
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["list"] = tempOutNames;
32                 outputTypes["rabund"] = tempOutNames;
33                 outputTypes["sabund"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> ClusterDoturCommand::getRequiredParameters(){    
42         try {
43                 string Array[] =  {"phylip"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ClusterDoturCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> ClusterDoturCommand::getRequiredFiles(){ 
54         try {
55                 vector<string> myArray; 
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "ClusterDoturCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
65 ClusterDoturCommand::ClusterDoturCommand(string option)  {
66         try{
67                 
68                 abort = false;
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"phylip","name","cutoff","hard","precision","method","outputdir","inputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string,string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                 
83                         //check to make sure all parameters are valid for command
84                         map<string,string>::iterator it;
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
87                                         abort = true;
88                                 }
89                         }
90                         
91                         //if the user changes the input directory command factory will send this info to us in the output parameter 
92                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
93                         if (inputDir == "not found"){   inputDir = "";          }
94                         else {
95                                 string path;
96                                 it = parameters.find("phylip");
97                                 //user has given a template file
98                                 if(it != parameters.end()){ 
99                                         path = m->hasPath(it->second);
100                                         //if the user has not given a path then, add inputdir. else leave path alone.
101                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
102                                 }
103                                 
104                                 it = parameters.find("name");
105                                 //user has given a template file
106                                 if(it != parameters.end()){ 
107                                         path = m->hasPath(it->second);
108                                         //if the user has not given a path then, add inputdir. else leave path alone.
109                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
110                                 }
111
112                         }
113                         
114                         //initialize outputTypes
115                         vector<string> tempOutNames;
116                         outputTypes["list"] = tempOutNames;
117                         outputTypes["rabund"] = tempOutNames;
118                         outputTypes["sabund"] = tempOutNames;
119                 
120                         //if the user changes the output directory command factory will send this info to us in the output parameter 
121                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
122                         
123                         //check for required parameters
124                         phylipfile = validParameter.validFile(parameters, "phylip", true);
125                         if (phylipfile == "not open") { abort = true; }
126                         else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("When executing the cluster.dotur command you must enter a phylip file."); m->mothurOutEndLine(); abort = true; }   
127
128                 
129                         //check for optional parameter and set defaults
130                         namefile = validParameter.validFile(parameters, "name", true);
131                         if (namefile == "not open") { abort = true; }   
132                         else if (namefile == "not found") { namefile = ""; }
133                         
134                         string temp;
135                         temp = validParameter.validFile(parameters, "precision", false);
136                         if (temp == "not found") { temp = "100"; }
137                         //saves precision legnth for formatting below
138                         length = temp.length();
139                         convert(temp, precision); 
140                         
141                         temp = validParameter.validFile(parameters, "cutoff", false);
142                         if (temp == "not found") { temp = "10"; }
143                         convert(temp, cutoff); 
144                         cutoff += (5 / (precision * 10.0));  
145                         
146                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
147                         hard = m->isTrue(temp);
148                         
149                         method = validParameter.validFile(parameters, "method", false);
150                         if (method == "not found") { method = "furthest"; }
151                         
152                         if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { 
153                                 if (method == "furthest") { tag = "fn"; }
154                                 else if (method == "nearest") { tag = "nn"; }
155                                 else if (method == "average") { tag = "an"; }
156                                 else if (method == "weighted") { tag = "wn"; }
157                         }else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; }
158                 }
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
162                 exit(1);
163         }
164 }
165
166 //**********************************************************************************************************************
167
168 void ClusterDoturCommand::help(){
169         try {
170                 m->mothurOut("The cluster.classic command clusters using the algorithm from dotur. \n");
171                 m->mothurOut("The cluster.classic command parameter options are phylip, name, method, cuttoff, hard, precision. Phylip is required.\n");
172                 m->mothurOut("The cluster.classic command should be in the following format: \n");
173                 m->mothurOut("cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
174                 m->mothurOut("The acceptable cluster methods are furthest, nearest, weighted and average.  If no method is provided then furthest is assumed.\n\n");    
175
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "ClusterDoturCommand", "help");
179                 exit(1);
180         }
181 }
182
183 //**********************************************************************************************************************
184
185 ClusterDoturCommand::~ClusterDoturCommand(){}
186
187 //**********************************************************************************************************************
188
189 int ClusterDoturCommand::execute(){
190         try {
191         
192                 if (abort == true) {    return 0;       }
193                 
194                 if(namefile != ""){     
195                         nameMap = new NameAssignment(namefile);
196                         nameMap->readMap();
197                 }else{
198                         nameMap = NULL;
199                 }
200                 
201                 //reads phylip file storing data in 2D vector, also fills list and rabund
202                 ClusterClassic* cluster = new ClusterClassic(cutoff, method);
203                 cluster->readPhylipFile(phylipfile, nameMap);
204                 
205                 if (m->control_pressed) { delete cluster; delete list; delete rabund; return 0; }
206                 
207                 list = cluster->getListVector();
208                 rabund = cluster->getRAbundVector();
209                                                 
210                 if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
211                 fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
212                         
213                 m->openOutputFile(fileroot+ tag + ".sabund",    sabundFile);
214                 m->openOutputFile(fileroot+ tag + ".rabund",    rabundFile);
215                 m->openOutputFile(fileroot+ tag + ".list",              listFile);
216                                 
217                 outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
218                 outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
219                 outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
220                 
221                 float previousDist = 0.00000;
222                 float rndPreviousDist = 0.00000;
223                 oldRAbund = *rabund;
224                 oldList = *list;
225
226                 double saveCutoff = cutoff;
227                 
228                 int estart = time(NULL);
229         
230                 while (cluster->getSmallDist() < cutoff && cluster->getNSeqs() > 0){
231                 
232                         if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close();  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());         } outputTypes.clear();  return 0;  }
233                 
234                         cluster->update(cutoff);
235         
236                         float dist = cluster->getSmallDist();
237                         float rndDist;
238                         if (hard) {
239                                 rndDist = m->ceilDist(dist, precision); 
240                         }else{
241                                 rndDist = m->roundDist(dist, precision); 
242                         }
243
244                         if(previousDist <= 0.0000 && dist != previousDist){
245                                 printData("unique");
246                         }
247                         else if(rndDist != rndPreviousDist){
248                                 printData(toString(rndPreviousDist,  length-1));
249                         }
250                 
251                         previousDist = dist;
252                         rndPreviousDist = rndDist;
253                         oldRAbund = *rabund;
254                         oldList = *list;
255                 }
256         
257                 if(previousDist <= 0.0000){
258                         printData("unique");
259                 }
260                 else if(rndPreviousDist<cutoff){
261                         printData(toString(rndPreviousDist, length-1));
262                 }
263                                         
264                 sabundFile.close();
265                 rabundFile.close();
266                 listFile.close();
267                 
268                 delete cluster; delete nameMap; delete list; delete rabund;
269         
270                 if (saveCutoff != cutoff) { 
271                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
272                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
273                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
274                 }
275                 
276                 m->mothurOutEndLine();
277                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
278                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
279                 m->mothurOutEndLine();
280
281                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
282
283                 return 0;
284         }
285         catch(exception& e) {
286                 m->errorOut(e, "ClusterDoturCommand", "execute");
287                 exit(1);
288         }
289 }
290
291 //**********************************************************************************************************************
292
293 void ClusterDoturCommand::printData(string label){
294         try {
295         
296                 oldRAbund.setLabel(label);
297                 oldRAbund.print(rabundFile);
298                 oldRAbund.getSAbundVector().print(sabundFile);
299                 
300                 oldRAbund.getSAbundVector().print(cout);
301                 
302                 oldList.setLabel(label);
303                 oldList.print(listFile);
304         }
305         catch(exception& e) {
306                 m->errorOut(e, "ClusterDoturCommand", "printData");
307                 exit(1);
308         }
309 }
310 //**********************************************************************************************************************