]> git.donarmstrong.com Git - mothur.git/blob - clustercommand.cpp
fixed some bugs
[mothur.git] / clustercommand.cpp
1 /*
2  *  clustercommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "clustercommand.h"
11
12 //**********************************************************************************************************************
13 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
14 ClusterCommand::ClusterCommand(string option){
15         try{
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"cutoff","precision","method"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //error checking to make sure they read a distance file
38                         if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
39                                 cout << "Before you use the cluster command, you first need to read in a distance matrix." << endl;  abort = true;
40                         } 
41                 
42                         //check for optional parameter and set defaults
43                         // ...at some point should added some additional type checking...
44                         //get user cutoff and precision or use defaults
45                         string temp;
46                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
47                         //saves precision legnth for formatting below
48                         length = temp.length();
49                         convert(temp, precision); 
50                         
51                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
52                         convert(temp, cutoff); 
53                         cutoff += (5 / (precision * 10.0));
54                         
55                         method = validParameter.validFile(parameters, "method", false);                 if (method == "not found") { method = "furthest"; }
56
57                         
58                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
59                         else {cout << "Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average." << endl; abort = true; }
60
61                         
62                         if (abort == false) {
63                         
64                                 //get matrix, list and rabund for execute
65                                 if(globaldata->gSparseMatrix != NULL)   {       matrix = globaldata->gSparseMatrix;             }
66                         
67                                 if(globaldata->gListVector != NULL){
68                                         list = globaldata->gListVector;
69                                         rabund = new RAbundVector(list->getRAbundVector());
70                                 }
71                                 
72                                 //create cluster
73                                 if(method == "furthest")        {       cluster = new CompleteLinkage(rabund, list, matrix);    tag = "fn";     }
74                                 else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix);              tag = "nn";     }
75                                 else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix);             tag = "an";     }
76                                 else                                            {       cout << "error - not recognized method" << endl;        abort = true;   }       
77                                 
78                                 fileroot = getRootName(globaldata->inputFileName);
79                         
80                                 openOutputFile(fileroot+ tag + ".sabund",       sabundFile);
81                                 openOutputFile(fileroot+ tag + ".rabund",       rabundFile);
82                                 openOutputFile(fileroot+ tag + ".list",         listFile);
83                                 
84                                 
85                         }
86
87                 }
88                 
89         }
90         catch(exception& e) {
91                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function ClusterCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
92                 exit(1);
93         }
94         catch(...) {
95                 cout << "An unknown error has occurred in the ClusterCommand class function ClusterCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
96                 exit(1);
97         }
98 }
99
100 //**********************************************************************************************************************
101
102 void ClusterCommand::help(){
103         try {
104                 cout << "The cluster command can only be executed after a successful read.dist command." << "\n";
105                 cout << "The cluster command parameter options are method, cuttoff and precision. No parameters are required." << "\n";
106                 cout << "The cluster command should be in the following format: " << "\n";
107                 cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n";
108                 cout << "The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed." << "\n" << "\n";        
109         }
110         catch(exception& e) {
111                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
112                 exit(1);
113         }
114         catch(...) {
115                 cout << "An unknown error has occurred in the ClusterCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
116                 exit(1);
117         }       
118 }
119
120 //**********************************************************************************************************************
121
122 ClusterCommand::~ClusterCommand(){
123         if (abort == false) {
124                 delete cluster;
125                 delete rabund;
126         }
127 }
128
129 //**********************************************************************************************************************
130
131 int ClusterCommand::execute(){
132         try {
133         
134                 if (abort == true) {    return 0;       }
135                 
136                 float previousDist = 0.00000;
137                 float rndPreviousDist = 0.00000;
138                 oldRAbund = *rabund;
139                 oldList = *list;
140                 
141                 float x;
142                 x=0.1;
143                 toString(x, 2);
144         
145                 while(matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
146                         cluster->update();
147                         float dist = matrix->getSmallDist();
148                         float rndDist = roundDist(dist, precision);
149
150                         if(previousDist <= 0.0000 && dist != previousDist){
151                                 printData("unique");
152                         }
153                         else if(rndDist != rndPreviousDist){
154                                 printData(toString(rndPreviousDist,  length-1));
155                         }
156                 
157                         previousDist = dist;
158                         rndPreviousDist = rndDist;
159                         oldRAbund = *rabund;
160                         oldList = *list;
161                 }
162         
163                 if(previousDist <= 0.0000){
164                         printData("unique");
165                 }
166                 else if(rndPreviousDist<cutoff){
167                         printData(toString(rndPreviousDist, length-1));
168                 }
169                 
170                 //delete globaldata's copy of the sparsematrix and listvector to free up memory
171                 delete globaldata->gSparseMatrix;  globaldata->gSparseMatrix = NULL;
172                 delete globaldata->gListVector;  globaldata->gListVector = NULL;
173                 
174                 //saves .list file so you can do the collect, rarefaction and summary commands without doing a read.list
175                 if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); }
176                 else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); }
177                 
178                 globaldata->setListFile(fileroot+ tag + ".list");
179                 globaldata->setNameFile("");
180                 globaldata->setFormat("list");
181                 
182                 sabundFile.close();
183                 rabundFile.close();
184                 listFile.close();
185                 
186                 return 0;
187         }
188         catch(exception& e) {
189                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
190                 exit(1);
191         }
192         catch(...) {
193                 cout << "An unknown error has occurred in the ClusterCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
194                 exit(1);
195         }
196
197 }
198
199 //**********************************************************************************************************************
200
201 void ClusterCommand::printData(string label){
202         try {
203                 oldRAbund.setLabel(label);
204                 oldRAbund.getSAbundVector().print(cout);
205                 oldRAbund.print(rabundFile);
206                 oldRAbund.getSAbundVector().print(sabundFile);
207         
208                 oldList.setLabel(label);
209                 oldList.print(listFile);
210         }
211         catch(exception& e) {
212                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function printData. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
213                 exit(1);
214         }
215         catch(...) {
216                 cout << "An unknown error has occurred in the ClusterCommand class function printData. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
217                 exit(1);
218         }
219
220 }
221 //**********************************************************************************************************************