]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
broke up globaldata and moved error checking and help into commands
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 MatrixOutputCommand::MatrixOutputCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         parser = new OptionParser();
44                         parser->parse(option, parameters);  delete parser;
45                         
46                         ValidParameters* validParameter = new ValidParameters();
47                 
48                         //check to make sure all parameters are valid for command
49                         for (it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //make sure the user has already run the read.otu command
54                         if (globaldata->getSharedFile() == "") {
55                                 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
56                                 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
57                         }
58                         
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         line = validParameter->validFile(parameters, "line", false);                            
62                         if (line == "not found") { line = "";  }
63                         else { 
64                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         label = validParameter->validFile(parameters, "label", false);                  
69                         if (label == "not found") { label = ""; }
70                         else { 
71                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
72                                 else { allLines = 1;  }
73                         }
74                         
75                         //make sure user did not use both the line and label parameters
76                         if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
77                         //if the user has not specified any line or labels use the ones from read.otu
78                         else if((line == "") && (label == "")) {  
79                                 allLines = globaldata->allLines; 
80                                 labels = globaldata->labels; 
81                                 lines = globaldata->lines;
82                         }
83                                 
84                         groups = validParameter->validFile(parameters, "groups", false);                        
85                         if (groups == "not found") { groups = ""; }
86                         else { 
87                                 splitAtDash(groups, Groups);
88                                 globaldata->Groups = Groups;
89                         }
90                                 
91                         calc = validParameter->validFile(parameters, "calc", false);                    
92                         if (calc == "not found") { calc = "jclass-thetayc";  }
93                         else { 
94                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
95                         }
96                         splitAtDash(calc, Estimators);
97
98                         delete validParameter;
99                         
100                         
101                         if (abort == false) {
102                         
103                                 validCalculator = new ValidCalculators();
104                                 
105                                 int i;
106                                 for (i=0; i<Estimators.size(); i++) {
107                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
108                                                 if (Estimators[i] == "jabund") {        
109                                                         matrixCalculators.push_back(new JAbund());
110                                                 }else if (Estimators[i] == "sorabund") { 
111                                                         matrixCalculators.push_back(new SorAbund());
112                                                 }else if (Estimators[i] == "jclass") { 
113                                                         matrixCalculators.push_back(new Jclass());
114                                                 }else if (Estimators[i] == "sorclass") { 
115                                                         matrixCalculators.push_back(new SorClass());
116                                                 }else if (Estimators[i] == "jest") { 
117                                                         matrixCalculators.push_back(new Jest());
118                                                 }else if (Estimators[i] == "sorest") { 
119                                                         matrixCalculators.push_back(new SorEst());
120                                                 }else if (Estimators[i] == "thetayc") { 
121                                                         matrixCalculators.push_back(new ThetaYC());
122                                                 }else if (Estimators[i] == "thetan") { 
123                                                         matrixCalculators.push_back(new ThetaN());
124                                                 }else if (Estimators[i] == "morisitahorn") { 
125                                                         matrixCalculators.push_back(new MorHorn());
126                                                 }else if (Estimators[i] == "braycurtis") { 
127                                                         matrixCalculators.push_back(new BrayCurtis());
128                                                 }
129                                         }
130                                 }
131                                 
132                         }
133                 }
134                 
135         }
136         catch(exception& e) {
137                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }
140         catch(...) {
141                 cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
142                 exit(1);
143         }       
144 }
145
146 //**********************************************************************************************************************
147
148 void MatrixOutputCommand::help(){
149         try {
150                 cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n";
151                 cout << "The dist.shared command parameters are groups, calc, line and label.  The calc parameter is required, and you may not use line and label at the same time." << "\n";
152                 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
153                 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n";
154                 cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
155                 cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
156                 cout << "The default value for groups is all the groups in your groupfile." << "\n";
157                 cout << "The default value for calc is jclass and thetayc." << "\n";
158                 validCalculator->printCalc("matrix", cout);
159                 cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n";
160                 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
161         }
162         catch(exception& e) {
163                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164                 exit(1);
165         }
166         catch(...) {
167                 cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168                 exit(1);
169         }       
170 }
171
172
173 //**********************************************************************************************************************
174
175 MatrixOutputCommand::~MatrixOutputCommand(){
176         delete input;
177         delete read;
178         delete validCalculator;
179 }
180
181 //**********************************************************************************************************************
182
183 int MatrixOutputCommand::execute(){
184         try {
185                 
186                 if (abort == true) {    return 0;       }
187         
188                 int count = 1;  
189                                 
190                 //if the users entered no valid calculators don't execute command
191                 if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
192
193                 //you have groups
194                 read = new ReadOTUFile(globaldata->inputFileName);      
195                 read->read(&*globaldata); 
196                         
197                 input = globaldata->ginput;
198                 lookup = input->getSharedRAbundVectors();
199                 vector<SharedRAbundVector*> lastLookup = lookup;
200                 
201                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
202                 set<string> processedLabels;
203                 set<string> userLabels = labels;
204                 set<int> userLines = lines;
205                                 
206                 if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0;}
207                 
208                 numGroups = lookup.size();
209                                 
210                 //as long as you are not at the end of the file or done wih the lines you want
211                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
212                 
213                         if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){                       
214                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
215                                 process(lookup);
216                                 
217                                 processedLabels.insert(lookup[0]->getLabel());
218                                 userLabels.erase(lookup[0]->getLabel());
219                                 userLines.erase(count);
220                         }
221                         
222                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
223                                 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
224                                 process(lastLookup);
225                                 
226                                 processedLabels.insert(lastLookup[0]->getLabel());
227                                 userLabels.erase(lastLookup[0]->getLabel());
228                         }
229
230                         //prevent memory leak
231                         if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  } }
232                         lastLookup = lookup;                    
233                         
234                         //get next line to process
235                         lookup = input->getSharedRAbundVectors();
236                         count++;
237                 }
238                 
239                 //output error messages about any remaining user labels
240                 set<string>::iterator it;
241                 bool needToRun = false;
242                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
243                         cout << "Your file does not include the label "<< *it; 
244                         if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
245                                 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
246                                 needToRun = true;
247                         }else {
248                                 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
249                         }
250                 }
251                 
252                 //run last line if you need to
253                 if (needToRun == true)  {
254                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
255                         process(lastLookup);            
256                 }
257                 
258                 for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  }
259
260                 
261                 //reset groups parameter
262                 globaldata->Groups.clear();  
263
264                 return 0;
265         }
266         catch(exception& e) {
267                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
268                 exit(1);
269         }
270         catch(...) {
271                 cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
272                 exit(1);
273         }               
274 }
275 /***********************************************************/
276 void MatrixOutputCommand::printSims(ostream& out) {
277         try {
278                 
279                 //output column headers
280                 out << simMatrix.size() << endl;
281                 
282                 for (int m = 0; m < simMatrix.size(); m++)      {
283                         out << lookup[m]->getGroup() << '\t';
284                         for (int n = 0; n < m; n++)     {
285                                 out << simMatrix[m][n] << '\t'; 
286                         }
287                         out << endl;
288                 }
289
290         }
291         catch(exception& e) {
292                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293                 exit(1);
294         }
295         catch(...) {
296                 cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297                 exit(1);
298         }               
299 }
300 /***********************************************************/
301 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
302         try {
303         
304                                 EstOutput data;
305                                 vector<SharedRAbundVector*> subset;
306
307                                 //for each calculator                                                                                           
308                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
309                                         
310                                         //initialize simMatrix
311                                         simMatrix.clear();
312                                         simMatrix.resize(numGroups);
313                                         for (int m = 0; m < simMatrix.size(); m++)      {
314                                                 for (int j = 0; j < simMatrix.size(); j++)      {
315                                                         simMatrix[m].push_back(0.0);
316                                                 }
317                                         }
318                                 
319                                         for (int k = 0; k < thisLookup.size(); k++) { 
320                                                 for (int l = k; l < thisLookup.size(); l++) {
321                                                         if (k != l) { //we dont need to similiarity of a groups to itself
322                                                                 //get estimated similarity between 2 groups
323                                                                 
324                                                                 subset.clear(); //clear out old pair of sharedrabunds
325                                                                 //add new pair of sharedrabunds
326                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
327                                                                 
328                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
329                                                                 //save values in similarity matrix
330                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
331                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
332                                                         }
333                                                 }
334                                         }
335                                         
336                                         exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
337                                         openOutputFile(exportFileName, out);
338                                         printSims(out);
339                                         out.close();
340                                         
341                                 }
342
343         
344                 
345         }
346         catch(exception& e) {
347                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
348                 exit(1);
349         }
350         catch(...) {
351                 cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
352                 exit(1);
353         }               
354 }
355 /***********************************************************/
356
357
358