]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
added hcluster command and fixed some bugs, namely one with smart distancing.
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 MatrixOutputCommand::MatrixOutputCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Groups.clear();
32                 Estimators.clear();
33                 
34                 //allow user to run help
35                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
36                 
37                 else {
38                         //valid paramters for this command
39                         string Array[] =  {"label","calc","groups"};
40                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
41                         
42                         OptionParser parser(option);
43                         map<string,string> parameters  = parser.getParameters();
44                         
45                         ValidParameters validParameter;
46                 
47                         //check to make sure all parameters are valid for command
48                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
49                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
50                         }
51                         
52                         //make sure the user has already run the read.otu command
53                         if (globaldata->getSharedFile() == "") {
54                                 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); mothurOutEndLine(); abort = true; }
55                                 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); mothurOutEndLine(); abort = true; }
56                         }
57                         
58                         //check for optional parameter and set defaults
59                         // ...at some point should added some additional type checking...
60                         label = validParameter.validFile(parameters, "label", false);                   
61                         if (label == "not found") { label = ""; }
62                         else { 
63                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
64                                 else { allLines = 1;  }
65                         }
66                         
67                         //if the user has not specified any labels use the ones from read.otu
68                         if (label == "") {  
69                                 allLines = globaldata->allLines; 
70                                 labels = globaldata->labels; 
71                         }
72                                 
73                         groups = validParameter.validFile(parameters, "groups", false);                 
74                         if (groups == "not found") { groups = ""; }
75                         else { 
76                                 splitAtDash(groups, Groups);
77                                 globaldata->Groups = Groups;
78                         }
79                                 
80                         calc = validParameter.validFile(parameters, "calc", false);                     
81                         if (calc == "not found") { calc = "jclass-thetayc";  }
82                         else { 
83                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
84                         }
85                         splitAtDash(calc, Estimators);
86
87                         if (abort == false) {
88                         
89                                 validCalculator = new ValidCalculators();
90                                 
91                                 int i;
92                                 for (i=0; i<Estimators.size(); i++) {
93                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
94                                                 if (Estimators[i] == "jabund") {        
95                                                         matrixCalculators.push_back(new JAbund());
96                                                 }else if (Estimators[i] == "sorabund") { 
97                                                         matrixCalculators.push_back(new SorAbund());
98                                                 }else if (Estimators[i] == "jclass") { 
99                                                         matrixCalculators.push_back(new Jclass());
100                                                 }else if (Estimators[i] == "sorclass") { 
101                                                         matrixCalculators.push_back(new SorClass());
102                                                 }else if (Estimators[i] == "jest") { 
103                                                         matrixCalculators.push_back(new Jest());
104                                                 }else if (Estimators[i] == "sorest") { 
105                                                         matrixCalculators.push_back(new SorEst());
106                                                 }else if (Estimators[i] == "thetayc") { 
107                                                         matrixCalculators.push_back(new ThetaYC());
108                                                 }else if (Estimators[i] == "thetan") { 
109                                                         matrixCalculators.push_back(new ThetaN());
110                                                 }else if (Estimators[i] == "morisitahorn") { 
111                                                         matrixCalculators.push_back(new MorHorn());
112                                                 }else if (Estimators[i] == "braycurtis") { 
113                                                         matrixCalculators.push_back(new BrayCurtis());
114                                                 }
115                                         }
116                                 }
117                                 
118                         }
119                 }
120                 
121         }
122         catch(exception& e) {
123                 errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
124                 exit(1);
125         }
126 }
127
128 //**********************************************************************************************************************
129
130 void MatrixOutputCommand::help(){
131         try {
132                 mothurOut("The dist.shared command can only be executed after a successful read.otu command.\n");
133                 mothurOut("The dist.shared command parameters are groups, calc and label.  No parameters are required.\n");
134                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
135                 mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n");
136                 mothurOut("The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n");
137                 mothurOut("Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n");
138                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
139                 mothurOut("The default value for calc is jclass and thetayc.\n");
140                 validCalculator->printCalc("matrix", cout);
141                 mothurOut("The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose.\n");
142                 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
143         }
144         catch(exception& e) {
145                 errorOut(e, "MatrixOutputCommand", "help");
146                 exit(1);
147         }
148 }
149
150
151 //**********************************************************************************************************************
152
153 MatrixOutputCommand::~MatrixOutputCommand(){
154         if (abort == false) {
155                 delete input; globaldata->ginput = NULL;
156                 delete read;
157                 delete validCalculator;
158         }
159 }
160
161 //**********************************************************************************************************************
162
163 int MatrixOutputCommand::execute(){
164         try {
165                 
166                 if (abort == true) {    return 0;       }
167                         
168                 //if the users entered no valid calculators don't execute command
169                 if (matrixCalculators.size() == 0) { mothurOut("No valid calculators."); mothurOutEndLine();  return 0; }
170
171                 //you have groups
172                 read = new ReadOTUFile(globaldata->inputFileName);      
173                 read->read(&*globaldata); 
174                         
175                 input = globaldata->ginput;
176                 lookup = input->getSharedRAbundVectors();
177                 string lastLabel = lookup[0]->getLabel();
178                 
179                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
180                 set<string> processedLabels;
181                 set<string> userLabels = labels;
182                                         
183                 if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups.  I cannot run the command."); mothurOutEndLine(); return 0;}
184                 
185                 numGroups = lookup.size();
186                                 
187                 //as long as you are not at the end of the file or done wih the lines you want
188                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
189                 
190                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
191                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
192                                 process(lookup);
193                                 
194                                 processedLabels.insert(lookup[0]->getLabel());
195                                 userLabels.erase(lookup[0]->getLabel());
196                         }
197                         
198                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
199                                 string saveLabel = lookup[0]->getLabel();
200                                 
201                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
202                                 lookup = input->getSharedRAbundVectors(lastLabel);
203
204                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
205                                 process(lookup);
206                                 
207                                 processedLabels.insert(lookup[0]->getLabel());
208                                 userLabels.erase(lookup[0]->getLabel());
209                                 
210                                 //restore real lastlabel to save below
211                                 lookup[0]->setLabel(saveLabel);
212                         }
213
214                         lastLabel = lookup[0]->getLabel();                      
215                         
216                         //get next line to process
217                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
218                         lookup = input->getSharedRAbundVectors();
219                 }
220                 
221                 //output error messages about any remaining user labels
222                 set<string>::iterator it;
223                 bool needToRun = false;
224                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
225                         mothurOut("Your file does not include the label " + *it);  
226                         if (processedLabels.count(lastLabel) != 1) {
227                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
228                                 needToRun = true;
229                         }else {
230                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
231                         }
232                 }
233                 
234                 //run last label if you need to
235                 if (needToRun == true)  {
236                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {  delete lookup[i]; }  } 
237                         lookup = input->getSharedRAbundVectors(lastLabel);
238
239                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
240                         process(lookup);
241                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
242                 }
243                 
244                                 
245                 //reset groups parameter
246                 globaldata->Groups.clear();  
247
248                 return 0;
249         }
250         catch(exception& e) {
251                 errorOut(e, "MatrixOutputCommand", "execute");
252                 exit(1);
253         }
254 }
255 /***********************************************************/
256 void MatrixOutputCommand::printSims(ostream& out) {
257         try {
258                 
259                 //output column headers
260                 out << simMatrix.size() << endl;
261                 
262                 for (int m = 0; m < simMatrix.size(); m++)      {
263                         out << lookup[m]->getGroup() << '\t';
264                         for (int n = 0; n < m; n++)     {
265                                 out << simMatrix[m][n] << '\t'; 
266                         }
267                         out << endl;
268                 }
269
270         }
271         catch(exception& e) {
272                 errorOut(e, "MatrixOutputCommand", "printSims");
273                 exit(1);
274         }
275 }
276 /***********************************************************/
277 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
278         try {
279         
280                                 EstOutput data;
281                                 vector<SharedRAbundVector*> subset;
282
283                                 //for each calculator                                                                                           
284                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
285                                         
286                                         //initialize simMatrix
287                                         simMatrix.clear();
288                                         simMatrix.resize(numGroups);
289                                         for (int m = 0; m < simMatrix.size(); m++)      {
290                                                 for (int j = 0; j < simMatrix.size(); j++)      {
291                                                         simMatrix[m].push_back(0.0);
292                                                 }
293                                         }
294                                 
295                                         for (int k = 0; k < thisLookup.size(); k++) { 
296                                                 for (int l = k; l < thisLookup.size(); l++) {
297                                                         if (k != l) { //we dont need to similiarity of a groups to itself
298                                                                 //get estimated similarity between 2 groups
299                                                                 
300                                                                 subset.clear(); //clear out old pair of sharedrabunds
301                                                                 //add new pair of sharedrabunds
302                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
303                                                                 
304                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
305                                                                 //save values in similarity matrix
306                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
307                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
308                                                         }
309                                                 }
310                                         }
311                                         
312                                         exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
313                                         openOutputFile(exportFileName, out);
314                                         printSims(out);
315                                         out.close();
316                                         
317                                 }
318
319         
320                 
321         }
322         catch(exception& e) {
323                 errorOut(e, "MatrixOutputCommand", "process");
324                 exit(1);
325         }
326 }
327 /***********************************************************/
328
329
330