]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
changed how we do "smart" distancing
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 MatrixOutputCommand::MatrixOutputCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string,string> parameters  = parser.getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //make sure the user has already run the read.otu command
54                         if (globaldata->getSharedFile() == "") {
55                                 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
56                                 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
57                         }
58                         
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         line = validParameter.validFile(parameters, "line", false);                             
62                         if (line == "not found") { line = "";  }
63                         else { 
64                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         label = validParameter.validFile(parameters, "label", false);                   
69                         if (label == "not found") { label = ""; }
70                         else { 
71                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
72                                 else { allLines = 1;  }
73                         }
74                         
75                         //make sure user did not use both the line and label parameters
76                         if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
77                         //if the user has not specified any line or labels use the ones from read.otu
78                         else if((line == "") && (label == "")) {  
79                                 allLines = globaldata->allLines; 
80                                 labels = globaldata->labels; 
81                                 lines = globaldata->lines;
82                         }
83                                 
84                         groups = validParameter.validFile(parameters, "groups", false);                 
85                         if (groups == "not found") { groups = ""; }
86                         else { 
87                                 splitAtDash(groups, Groups);
88                                 globaldata->Groups = Groups;
89                         }
90                                 
91                         calc = validParameter.validFile(parameters, "calc", false);                     
92                         if (calc == "not found") { calc = "jclass-thetayc";  }
93                         else { 
94                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
95                         }
96                         splitAtDash(calc, Estimators);
97
98                         if (abort == false) {
99                         
100                                 validCalculator = new ValidCalculators();
101                                 
102                                 int i;
103                                 for (i=0; i<Estimators.size(); i++) {
104                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
105                                                 if (Estimators[i] == "jabund") {        
106                                                         matrixCalculators.push_back(new JAbund());
107                                                 }else if (Estimators[i] == "sorabund") { 
108                                                         matrixCalculators.push_back(new SorAbund());
109                                                 }else if (Estimators[i] == "jclass") { 
110                                                         matrixCalculators.push_back(new Jclass());
111                                                 }else if (Estimators[i] == "sorclass") { 
112                                                         matrixCalculators.push_back(new SorClass());
113                                                 }else if (Estimators[i] == "jest") { 
114                                                         matrixCalculators.push_back(new Jest());
115                                                 }else if (Estimators[i] == "sorest") { 
116                                                         matrixCalculators.push_back(new SorEst());
117                                                 }else if (Estimators[i] == "thetayc") { 
118                                                         matrixCalculators.push_back(new ThetaYC());
119                                                 }else if (Estimators[i] == "thetan") { 
120                                                         matrixCalculators.push_back(new ThetaN());
121                                                 }else if (Estimators[i] == "morisitahorn") { 
122                                                         matrixCalculators.push_back(new MorHorn());
123                                                 }else if (Estimators[i] == "braycurtis") { 
124                                                         matrixCalculators.push_back(new BrayCurtis());
125                                                 }
126                                         }
127                                 }
128                                 
129                         }
130                 }
131                 
132         }
133         catch(exception& e) {
134                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
135                 exit(1);
136         }
137         catch(...) {
138                 cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
139                 exit(1);
140         }       
141 }
142
143 //**********************************************************************************************************************
144
145 void MatrixOutputCommand::help(){
146         try {
147                 cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n";
148                 cout << "The dist.shared command parameters are groups, calc, line and label.  The calc parameter is required, and you may not use line and label at the same time." << "\n";
149                 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
150                 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n";
151                 cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
152                 cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
153                 cout << "The default value for groups is all the groups in your groupfile." << "\n";
154                 cout << "The default value for calc is jclass and thetayc." << "\n";
155                 validCalculator->printCalc("matrix", cout);
156                 cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n";
157                 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
158         }
159         catch(exception& e) {
160                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
161                 exit(1);
162         }
163         catch(...) {
164                 cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
165                 exit(1);
166         }       
167 }
168
169
170 //**********************************************************************************************************************
171
172 MatrixOutputCommand::~MatrixOutputCommand(){
173         if (abort == false) {
174                 delete input; globaldata->ginput = NULL;
175                 delete read;
176                 delete validCalculator;
177         }
178 }
179
180 //**********************************************************************************************************************
181
182 int MatrixOutputCommand::execute(){
183         try {
184                 
185                 if (abort == true) {    return 0;       }
186         
187                 int count = 1;  
188                                 
189                 //if the users entered no valid calculators don't execute command
190                 if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
191
192                 //you have groups
193                 read = new ReadOTUFile(globaldata->inputFileName);      
194                 read->read(&*globaldata); 
195                         
196                 input = globaldata->ginput;
197                 lookup = input->getSharedRAbundVectors();
198                 string lastLabel = lookup[0]->getLabel();
199                 
200                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
201                 set<string> processedLabels;
202                 set<string> userLabels = labels;
203                 set<int> userLines = lines;
204                                 
205                 if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0;}
206                 
207                 numGroups = lookup.size();
208                                 
209                 //as long as you are not at the end of the file or done wih the lines you want
210                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
211                 
212                         if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){                       
213                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
214                                 process(lookup);
215                                 
216                                 processedLabels.insert(lookup[0]->getLabel());
217                                 userLabels.erase(lookup[0]->getLabel());
218                                 userLines.erase(count);
219                         }
220                         
221                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
222                         
223                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
224                                 lookup = input->getSharedRAbundVectors(lastLabel);
225
226                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
227                                 process(lookup);
228                                 
229                                 processedLabels.insert(lookup[0]->getLabel());
230                                 userLabels.erase(lookup[0]->getLabel());
231                         }
232
233                          
234                         lastLabel = lookup[0]->getLabel();                      
235                         
236                         //get next line to process
237                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
238                         lookup = input->getSharedRAbundVectors();
239                         count++;
240                 }
241                 
242                 //output error messages about any remaining user labels
243                 set<string>::iterator it;
244                 bool needToRun = false;
245                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
246                         cout << "Your file does not include the label "<< *it; 
247                         if (processedLabels.count(lastLabel) != 1) {
248                                 cout << ". I will use " << lastLabel << "." << endl;
249                                 needToRun = true;
250                         }else {
251                                 cout << ". Please refer to " << lastLabel << "." << endl;
252                         }
253                 }
254                 
255                 //run last line if you need to
256                 if (needToRun == true)  {
257                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
258                         lookup = input->getSharedRAbundVectors(lastLabel);
259
260                         cout << lookup[0]->getLabel() << '\t' << count << endl;
261                         process(lookup);
262                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
263                 }
264                 
265                                 
266                 //reset groups parameter
267                 globaldata->Groups.clear();  
268
269                 return 0;
270         }
271         catch(exception& e) {
272                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
273                 exit(1);
274         }
275         catch(...) {
276                 cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
277                 exit(1);
278         }               
279 }
280 /***********************************************************/
281 void MatrixOutputCommand::printSims(ostream& out) {
282         try {
283                 
284                 //output column headers
285                 out << simMatrix.size() << endl;
286                 
287                 for (int m = 0; m < simMatrix.size(); m++)      {
288                         out << lookup[m]->getGroup() << '\t';
289                         for (int n = 0; n < m; n++)     {
290                                 out << simMatrix[m][n] << '\t'; 
291                         }
292                         out << endl;
293                 }
294
295         }
296         catch(exception& e) {
297                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
298                 exit(1);
299         }
300         catch(...) {
301                 cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
302                 exit(1);
303         }               
304 }
305 /***********************************************************/
306 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
307         try {
308         
309                                 EstOutput data;
310                                 vector<SharedRAbundVector*> subset;
311
312                                 //for each calculator                                                                                           
313                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
314                                         
315                                         //initialize simMatrix
316                                         simMatrix.clear();
317                                         simMatrix.resize(numGroups);
318                                         for (int m = 0; m < simMatrix.size(); m++)      {
319                                                 for (int j = 0; j < simMatrix.size(); j++)      {
320                                                         simMatrix[m].push_back(0.0);
321                                                 }
322                                         }
323                                 
324                                         for (int k = 0; k < thisLookup.size(); k++) { 
325                                                 for (int l = k; l < thisLookup.size(); l++) {
326                                                         if (k != l) { //we dont need to similiarity of a groups to itself
327                                                                 //get estimated similarity between 2 groups
328                                                                 
329                                                                 subset.clear(); //clear out old pair of sharedrabunds
330                                                                 //add new pair of sharedrabunds
331                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
332                                                                 
333                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
334                                                                 //save values in similarity matrix
335                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
336                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
337                                                         }
338                                                 }
339                                         }
340                                         
341                                         exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
342                                         openOutputFile(exportFileName, out);
343                                         printSims(out);
344                                         out.close();
345                                         
346                                 }
347
348         
349                 
350         }
351         catch(exception& e) {
352                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
353                 exit(1);
354         }
355         catch(...) {
356                 cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
357                 exit(1);
358         }               
359 }
360 /***********************************************************/
361
362
363