]> git.donarmstrong.com Git - mothur.git/blob - clustercommand.cpp
broke up globaldata and moved error checking and help into commands
[mothur.git] / clustercommand.cpp
1 /*
2  *  clustercommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "clustercommand.h"
11
12 //**********************************************************************************************************************
13 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
14 ClusterCommand::ClusterCommand(string option){
15         try{
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"cutoff","precision","method"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         parser = new OptionParser();
28                         parser->parse(option, parameters);  delete parser;
29                         
30                         ValidParameters* validParameter = new ValidParameters();
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //error checking to make sure they read a distance file
38                         if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
39                                 cout << "Before you use the cluster command, you first need to read in a distance matrix." << endl;  abort = true;
40                         } 
41                 
42                         //check for optional parameter and set defaults
43                         // ...at some point should added some additional type checking...
44                         //get user cutoff and precision or use defaults
45                         string temp;
46                         temp = validParameter->validFile(parameters, "precision", false);               if (temp == "not found") { temp = "100"; }
47                         //saves precision legnth for formatting below
48                         length = temp.length();
49                         convert(temp, precision); 
50                         
51                         temp = validParameter->validFile(parameters, "cutoff", false);                  if (temp == "not found") { temp = "10"; }
52                         convert(temp, cutoff); 
53                         cutoff += (5 / (precision * 10.0));
54                         
55                         method = validParameter->validFile(parameters, "method", false);                        if (method == "not found") { method = "furthest"; }
56
57                         delete validParameter;
58                         
59                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
60                         else {cout << "Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average." << endl; abort = true; }
61
62                         
63                         if (abort == false) {
64                         
65                                 //get matrix, list and rabund for execute
66                                 if(globaldata->gSparseMatrix != NULL)   {       matrix = new SparseMatrix(*globaldata->gSparseMatrix);          }
67                         
68                                 if(globaldata->gListVector != NULL){
69                                         list = new ListVector(*globaldata->gListVector);
70                                         rabund = new RAbundVector(list->getRAbundVector());
71                                 }
72                                 
73                                 //create cluster
74                                 if(method == "furthest")        {       cluster = new CompleteLinkage(rabund, list, matrix);    tag = "fn";     }
75                                 else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix);              tag = "nn";     }
76                                 else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix);             tag = "an";     }
77                                 else                                            {       cout << "error - not recognized method" << endl;        abort = true;   }       
78                                 
79                                 fileroot = getRootName(globaldata->inputFileName);
80                         
81                                 openOutputFile(fileroot+ tag + ".sabund",       sabundFile);
82                                 openOutputFile(fileroot+ tag + ".rabund",       rabundFile);
83                                 openOutputFile(fileroot+ tag + ".list",         listFile);
84                                 
85                                 
86                         }
87
88                 }
89                 
90         }
91         catch(exception& e) {
92                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function ClusterCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
93                 exit(1);
94         }
95         catch(...) {
96                 cout << "An unknown error has occurred in the ClusterCommand class function ClusterCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
97                 exit(1);
98         }
99 }
100
101 //**********************************************************************************************************************
102
103 void ClusterCommand::help(){
104         try {
105                 cout << "The cluster command can only be executed after a successful read.dist command." << "\n";
106                 cout << "The cluster command parameter options are method, cuttoff and precision. No parameters are required." << "\n";
107                 cout << "The cluster command should be in the following format: " << "\n";
108                 cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n";
109                 cout << "The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed." << "\n" << "\n";        
110         }
111         catch(exception& e) {
112                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
113                 exit(1);
114         }
115         catch(...) {
116                 cout << "An unknown error has occurred in the ClusterCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
117                 exit(1);
118         }       
119 }
120
121 //**********************************************************************************************************************
122
123 ClusterCommand::~ClusterCommand(){
124         delete cluster;
125         delete matrix;
126         delete list;
127         delete rabund;
128 }
129
130 //**********************************************************************************************************************
131
132 int ClusterCommand::execute(){
133         try {
134         
135                 if (abort == true) {    return 0;       }
136                 
137                 float previousDist = 0.00000;
138                 float rndPreviousDist = 0.00000;
139                 oldRAbund = *rabund;
140                 oldList = *list;
141                 
142                 float x;
143                 x=0.1;
144                 toString(x, 2);
145         
146                 while(matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
147                         cluster->update();
148                         float dist = matrix->getSmallDist();
149                         float rndDist = roundDist(dist, precision);
150
151                         if(previousDist <= 0.0000 && dist != previousDist){
152                                 printData("unique");
153                         }
154                         else if(rndDist != rndPreviousDist){
155                                 printData(toString(rndPreviousDist,  length-1));
156                         }
157                 
158                         previousDist = dist;
159                         rndPreviousDist = rndDist;
160                         oldRAbund = *rabund;
161                         oldList = *list;
162                 }
163         
164                 if(previousDist <= 0.0000){
165                         printData("unique");
166                 }
167                 else if(rndPreviousDist<cutoff){
168                         printData(toString(rndPreviousDist, length-1));
169                 }
170                 
171                 //delete globaldata's copy of the sparsematrix and listvector to free up memory
172                 delete globaldata->gSparseMatrix;  globaldata->gSparseMatrix = NULL;
173                 delete globaldata->gListVector;  globaldata->gListVector = NULL;
174                 
175                 //saves .list file so you can do the collect, rarefaction and summary commands without doing a read.list
176                 if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); }
177                 else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); }
178                 
179                 globaldata->setListFile(fileroot+ tag + ".list");
180                 globaldata->setNameFile("");
181                 globaldata->setFormat("list");
182                 
183                 return 0;
184         }
185         catch(exception& e) {
186                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
187                 exit(1);
188         }
189         catch(...) {
190                 cout << "An unknown error has occurred in the ClusterCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
191                 exit(1);
192         }
193
194 }
195
196 //**********************************************************************************************************************
197
198 void ClusterCommand::printData(string label){
199         try {
200                 oldRAbund.setLabel(label);
201                 oldRAbund.getSAbundVector().print(cout);
202                 oldRAbund.print(rabundFile);
203                 oldRAbund.getSAbundVector().print(sabundFile);
204         
205                 oldList.setLabel(label);
206                 oldList.print(listFile);
207         }
208         catch(exception& e) {
209                 cout << "Standard Error: " << e.what() << " has occurred in the ClusterCommand class Function printData. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
210                 exit(1);
211         }
212         catch(...) {
213                 cout << "An unknown error has occurred in the ClusterCommand class function printData. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
214                 exit(1);
215         }
216
217 }
218 //**********************************************************************************************************************