]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
fixed some bugs
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 MatrixOutputCommand::MatrixOutputCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string,string> parameters  = parser.getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //make sure the user has already run the read.otu command
54                         if (globaldata->getSharedFile() == "") {
55                                 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
56                                 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
57                         }
58                         
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         line = validParameter.validFile(parameters, "line", false);                             
62                         if (line == "not found") { line = "";  }
63                         else { 
64                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         label = validParameter.validFile(parameters, "label", false);                   
69                         if (label == "not found") { label = ""; }
70                         else { 
71                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
72                                 else { allLines = 1;  }
73                         }
74                         
75                         //make sure user did not use both the line and label parameters
76                         if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
77                         //if the user has not specified any line or labels use the ones from read.otu
78                         else if((line == "") && (label == "")) {  
79                                 allLines = globaldata->allLines; 
80                                 labels = globaldata->labels; 
81                                 lines = globaldata->lines;
82                         }
83                                 
84                         groups = validParameter.validFile(parameters, "groups", false);                 
85                         if (groups == "not found") { groups = ""; }
86                         else { 
87                                 splitAtDash(groups, Groups);
88                                 globaldata->Groups = Groups;
89                         }
90                                 
91                         calc = validParameter.validFile(parameters, "calc", false);                     
92                         if (calc == "not found") { calc = "jclass-thetayc";  }
93                         else { 
94                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
95                         }
96                         splitAtDash(calc, Estimators);
97
98                         if (abort == false) {
99                         
100                                 validCalculator = new ValidCalculators();
101                                 
102                                 int i;
103                                 for (i=0; i<Estimators.size(); i++) {
104                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
105                                                 if (Estimators[i] == "jabund") {        
106                                                         matrixCalculators.push_back(new JAbund());
107                                                 }else if (Estimators[i] == "sorabund") { 
108                                                         matrixCalculators.push_back(new SorAbund());
109                                                 }else if (Estimators[i] == "jclass") { 
110                                                         matrixCalculators.push_back(new Jclass());
111                                                 }else if (Estimators[i] == "sorclass") { 
112                                                         matrixCalculators.push_back(new SorClass());
113                                                 }else if (Estimators[i] == "jest") { 
114                                                         matrixCalculators.push_back(new Jest());
115                                                 }else if (Estimators[i] == "sorest") { 
116                                                         matrixCalculators.push_back(new SorEst());
117                                                 }else if (Estimators[i] == "thetayc") { 
118                                                         matrixCalculators.push_back(new ThetaYC());
119                                                 }else if (Estimators[i] == "thetan") { 
120                                                         matrixCalculators.push_back(new ThetaN());
121                                                 }else if (Estimators[i] == "morisitahorn") { 
122                                                         matrixCalculators.push_back(new MorHorn());
123                                                 }else if (Estimators[i] == "braycurtis") { 
124                                                         matrixCalculators.push_back(new BrayCurtis());
125                                                 }
126                                         }
127                                 }
128                                 
129                         }
130                 }
131                 
132         }
133         catch(exception& e) {
134                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
135                 exit(1);
136         }
137         catch(...) {
138                 cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
139                 exit(1);
140         }       
141 }
142
143 //**********************************************************************************************************************
144
145 void MatrixOutputCommand::help(){
146         try {
147                 cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n";
148                 cout << "The dist.shared command parameters are groups, calc, line and label.  The calc parameter is required, and you may not use line and label at the same time." << "\n";
149                 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
150                 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n";
151                 cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
152                 cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
153                 cout << "The default value for groups is all the groups in your groupfile." << "\n";
154                 cout << "The default value for calc is jclass and thetayc." << "\n";
155                 validCalculator->printCalc("matrix", cout);
156                 cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n";
157                 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
158         }
159         catch(exception& e) {
160                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
161                 exit(1);
162         }
163         catch(...) {
164                 cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
165                 exit(1);
166         }       
167 }
168
169
170 //**********************************************************************************************************************
171
172 MatrixOutputCommand::~MatrixOutputCommand(){
173         if (abort == false) {
174                 delete input; globaldata->ginput = NULL;
175                 delete read;
176                 delete validCalculator;
177         }
178 }
179
180 //**********************************************************************************************************************
181
182 int MatrixOutputCommand::execute(){
183         try {
184                 
185                 if (abort == true) {    return 0;       }
186         
187                 int count = 1;  
188                                 
189                 //if the users entered no valid calculators don't execute command
190                 if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
191
192                 //you have groups
193                 read = new ReadOTUFile(globaldata->inputFileName);      
194                 read->read(&*globaldata); 
195                         
196                 input = globaldata->ginput;
197                 lookup = input->getSharedRAbundVectors();
198                 vector<SharedRAbundVector*> lastLookup = lookup;
199                 
200                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
201                 set<string> processedLabels;
202                 set<string> userLabels = labels;
203                 set<int> userLines = lines;
204                                 
205                 if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0;}
206                 
207                 numGroups = lookup.size();
208                                 
209                 //as long as you are not at the end of the file or done wih the lines you want
210                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
211                 
212                         if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){                       
213                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
214                                 process(lookup);
215                                 
216                                 processedLabels.insert(lookup[0]->getLabel());
217                                 userLabels.erase(lookup[0]->getLabel());
218                                 userLines.erase(count);
219                         }
220                         
221                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
222                                 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
223                                 process(lastLookup);
224                                 
225                                 processedLabels.insert(lastLookup[0]->getLabel());
226                                 userLabels.erase(lastLookup[0]->getLabel());
227                         }
228
229                         //prevent memory leak
230                         if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  } }
231                         lastLookup = lookup;                    
232                         
233                         //get next line to process
234                         lookup = input->getSharedRAbundVectors();
235                         count++;
236                 }
237                 
238                 //output error messages about any remaining user labels
239                 set<string>::iterator it;
240                 bool needToRun = false;
241                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
242                         cout << "Your file does not include the label "<< *it; 
243                         if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
244                                 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
245                                 needToRun = true;
246                         }else {
247                                 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
248                         }
249                 }
250                 
251                 //run last line if you need to
252                 if (needToRun == true)  {
253                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
254                         process(lastLookup);            
255                 }
256                 
257                 for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  }
258
259                 
260                 //reset groups parameter
261                 globaldata->Groups.clear();  
262
263                 return 0;
264         }
265         catch(exception& e) {
266                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
267                 exit(1);
268         }
269         catch(...) {
270                 cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271                 exit(1);
272         }               
273 }
274 /***********************************************************/
275 void MatrixOutputCommand::printSims(ostream& out) {
276         try {
277                 
278                 //output column headers
279                 out << simMatrix.size() << endl;
280                 
281                 for (int m = 0; m < simMatrix.size(); m++)      {
282                         out << lookup[m]->getGroup() << '\t';
283                         for (int n = 0; n < m; n++)     {
284                                 out << simMatrix[m][n] << '\t'; 
285                         }
286                         out << endl;
287                 }
288
289         }
290         catch(exception& e) {
291                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
292                 exit(1);
293         }
294         catch(...) {
295                 cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
296                 exit(1);
297         }               
298 }
299 /***********************************************************/
300 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
301         try {
302         
303                                 EstOutput data;
304                                 vector<SharedRAbundVector*> subset;
305
306                                 //for each calculator                                                                                           
307                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
308                                         
309                                         //initialize simMatrix
310                                         simMatrix.clear();
311                                         simMatrix.resize(numGroups);
312                                         for (int m = 0; m < simMatrix.size(); m++)      {
313                                                 for (int j = 0; j < simMatrix.size(); j++)      {
314                                                         simMatrix[m].push_back(0.0);
315                                                 }
316                                         }
317                                 
318                                         for (int k = 0; k < thisLookup.size(); k++) { 
319                                                 for (int l = k; l < thisLookup.size(); l++) {
320                                                         if (k != l) { //we dont need to similiarity of a groups to itself
321                                                                 //get estimated similarity between 2 groups
322                                                                 
323                                                                 subset.clear(); //clear out old pair of sharedrabunds
324                                                                 //add new pair of sharedrabunds
325                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
326                                                                 
327                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
328                                                                 //save values in similarity matrix
329                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
330                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
331                                                         }
332                                                 }
333                                         }
334                                         
335                                         exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
336                                         openOutputFile(exportFileName, out);
337                                         printSims(out);
338                                         out.close();
339                                         
340                                 }
341
342         
343                 
344         }
345         catch(exception& e) {
346                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
347                 exit(1);
348         }
349         catch(...) {
350                 cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
351                 exit(1);
352         }               
353 }
354 /***********************************************************/
355
356
357