]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
fixes while testing
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22 //**********************************************************************************************************************
23 vector<string> MatrixOutputCommand::getValidParameters(){       
24         try {
25                 string Array[] =  {"label","calc","groups","outputdir","inputdir", "output"};
26                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "MatrixOutputCommand", "getValidParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 MatrixOutputCommand::MatrixOutputCommand(){     
36         try {
37                 abort = true;
38                 //initialize outputTypes
39                 vector<string> tempOutNames;
40                 outputTypes["phylip"] = tempOutNames;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 vector<string> MatrixOutputCommand::getRequiredParameters(){    
49         try {
50                 vector<string> myArray;
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "MatrixOutputCommand", "getRequiredParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 vector<string> MatrixOutputCommand::getRequiredFiles(){ 
60         try {
61                 string Array[] =  {"shared"};
62                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
63                 return myArray;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "MatrixOutputCommand", "getRequiredFiles");
67                 exit(1);
68         }
69 }
70 //**********************************************************************************************************************
71
72 MatrixOutputCommand::MatrixOutputCommand(string option)  {
73         try {
74                 globaldata = GlobalData::getInstance();
75                 abort = false;
76                 allLines = 1;
77                 labels.clear();
78                 Groups.clear();
79                 Estimators.clear();
80                 
81                 //allow user to run help
82                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
83                 
84                 else {
85                         //valid paramters for this command
86                         string Array[] =  {"label","calc","groups","outputdir","inputdir", "output"};
87                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
88                         
89                         OptionParser parser(option);
90                         map<string,string> parameters  = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["phylip"] = tempOutNames;
102                         
103                         //if the user changes the output directory command factory will send this info to us in the output parameter 
104                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
105                                 outputDir = ""; 
106                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
107                         }
108                         
109                         //make sure the user has already run the read.otu command
110                         if (globaldata->getSharedFile() == "") {
111                                 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); m->mothurOutEndLine(); abort = true; }
112                                 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); m->mothurOutEndLine(); abort = true; }
113                         }
114                         
115                         //check for optional parameter and set defaults
116                         // ...at some point should added some additional type checking...
117                         label = validParameter.validFile(parameters, "label", false);                   
118                         if (label == "not found") { label = ""; }
119                         else { 
120                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
121                                 else { allLines = 1;  }
122                         }
123                         
124                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "lt"; }
125                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
126                         
127                         //if the user has not specified any labels use the ones from read.otu
128                         if (label == "") {  
129                                 allLines = globaldata->allLines; 
130                                 labels = globaldata->labels; 
131                         }
132                                 
133                         groups = validParameter.validFile(parameters, "groups", false);                 
134                         if (groups == "not found") { groups = ""; }
135                         else { 
136                                 m->splitAtDash(groups, Groups);
137                                 globaldata->Groups = Groups;
138                         }
139                                 
140                         calc = validParameter.validFile(parameters, "calc", false);                     
141                         if (calc == "not found") { calc = "jclass-thetayc";  }
142                         else { 
143                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
144                         }
145                         m->splitAtDash(calc, Estimators);
146
147                         if (abort == false) {
148                         
149                                 validCalculator = new ValidCalculators();
150                                 
151                                 int i;
152                                 for (i=0; i<Estimators.size(); i++) {
153                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
154                                                 if (Estimators[i] == "jabund") {        
155                                                         matrixCalculators.push_back(new JAbund());
156                                                 }else if (Estimators[i] == "sorabund") { 
157                                                         matrixCalculators.push_back(new SorAbund());
158                                                 }else if (Estimators[i] == "jclass") { 
159                                                         matrixCalculators.push_back(new Jclass());
160                                                 }else if (Estimators[i] == "sorclass") { 
161                                                         matrixCalculators.push_back(new SorClass());
162                                                 }else if (Estimators[i] == "jest") { 
163                                                         matrixCalculators.push_back(new Jest());
164                                                 }else if (Estimators[i] == "sorest") { 
165                                                         matrixCalculators.push_back(new SorEst());
166                                                 }else if (Estimators[i] == "thetayc") { 
167                                                         matrixCalculators.push_back(new ThetaYC());
168                                                 }else if (Estimators[i] == "thetan") { 
169                                                         matrixCalculators.push_back(new ThetaN());
170                                                 }else if (Estimators[i] == "morisitahorn") { 
171                                                         matrixCalculators.push_back(new MorHorn());
172                                                 }else if (Estimators[i] == "braycurtis") { 
173                                                         matrixCalculators.push_back(new BrayCurtis());
174                                                 }
175                                         }
176                                 }
177                                 
178                         }
179                 }
180                 
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
184                 exit(1);
185         }
186 }
187
188 //**********************************************************************************************************************
189
190 void MatrixOutputCommand::help(){
191         try {
192                 m->mothurOut("The dist.shared command can only be executed after a successful read.otu command.\n");
193                 m->mothurOut("The dist.shared command parameters are groups, calc, output and label.  No parameters are required.\n");
194                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
195                 m->mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n");
196                 m->mothurOut("The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n");
197                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n");
198                 m->mothurOut("Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n");
199                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
200                 m->mothurOut("The default value for calc is jclass and thetayc.\n");
201                 validCalculator->printCalc("matrix", cout);
202                 m->mothurOut("The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose.\n");
203                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
204         }
205         catch(exception& e) {
206                 m->errorOut(e, "MatrixOutputCommand", "help");
207                 exit(1);
208         }
209 }
210
211
212 //**********************************************************************************************************************
213
214 MatrixOutputCommand::~MatrixOutputCommand(){
215         if (abort == false) {
216                 delete input; globaldata->ginput = NULL;
217                 delete read;
218                 delete validCalculator;
219         }
220 }
221
222 //**********************************************************************************************************************
223
224 int MatrixOutputCommand::execute(){
225         try {
226                 
227                 if (abort == true) {    return 0;       }
228                         
229                 //if the users entered no valid calculators don't execute command
230                 if (matrixCalculators.size() == 0) { m->mothurOut("No valid calculators."); m->mothurOutEndLine();  return 0; }
231
232                 //you have groups
233                 read = new ReadOTUFile(globaldata->inputFileName);      
234                 read->read(&*globaldata); 
235                         
236                 input = globaldata->ginput;
237                 lookup = input->getSharedRAbundVectors();
238                 string lastLabel = lookup[0]->getLabel();
239                 
240                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
241                 set<string> processedLabels;
242                 set<string> userLabels = labels;
243                                         
244                 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); return 0;}
245                 
246                 numGroups = lookup.size();
247                 
248                 if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); return 0;  }
249                                 
250                 //as long as you are not at the end of the file or done wih the lines you want
251                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
252                 
253                         if (m->control_pressed) { outputTypes.clear(); delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
254                 
255                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
256                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
257                                 process(lookup);
258                                 
259                                 processedLabels.insert(lookup[0]->getLabel());
260                                 userLabels.erase(lookup[0]->getLabel());
261                         }
262                         
263                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
264                                 string saveLabel = lookup[0]->getLabel();
265                                 
266                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
267                                 lookup = input->getSharedRAbundVectors(lastLabel);
268
269                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
270                                 process(lookup);
271                                 
272                                 processedLabels.insert(lookup[0]->getLabel());
273                                 userLabels.erase(lookup[0]->getLabel());
274                                 
275                                 //restore real lastlabel to save below
276                                 lookup[0]->setLabel(saveLabel);
277                         }
278
279                         lastLabel = lookup[0]->getLabel();                      
280                         
281                         //get next line to process
282                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
283                         lookup = input->getSharedRAbundVectors();
284                 }
285                 
286                 if (m->control_pressed) { outputTypes.clear(); delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
287
288                 //output error messages about any remaining user labels
289                 set<string>::iterator it;
290                 bool needToRun = false;
291                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
292                         m->mothurOut("Your file does not include the label " + *it);  
293                         if (processedLabels.count(lastLabel) != 1) {
294                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
295                                 needToRun = true;
296                         }else {
297                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
298                         }
299                 }
300                 
301                 if (m->control_pressed) { outputTypes.clear(); delete read; delete input; globaldata->ginput = NULL;  for (int i = 0; i < outputNames.size(); i++) {    remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
302
303                 //run last label if you need to
304                 if (needToRun == true)  {
305                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {  delete lookup[i]; }  } 
306                         lookup = input->getSharedRAbundVectors(lastLabel);
307
308                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
309                         process(lookup);
310                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
311                 }
312                 
313                 if (m->control_pressed) { outputTypes.clear();  delete read; delete input; globaldata->ginput = NULL;  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
314                 
315                 //reset groups parameter
316                 globaldata->Groups.clear();  
317                 
318                 m->mothurOutEndLine();
319                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
320                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
321                 m->mothurOutEndLine();
322
323
324                 return 0;
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "MatrixOutputCommand", "execute");
328                 exit(1);
329         }
330 }
331 /***********************************************************/
332 void MatrixOutputCommand::printSims(ostream& out) {
333         try {
334                 
335                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
336                 
337                 //output num seqs
338                 out << simMatrix.size() << endl;
339                 
340                 if (output == "lt") {
341                         for (int m = 0; m < simMatrix.size(); m++)      {
342                                 out << lookup[m]->getGroup() << '\t';
343                                 for (int n = 0; n < m; n++)     {
344                                         out << simMatrix[m][n] << '\t'; 
345                                 }
346                                 out << endl;
347                         }
348                 }else{
349                         for (int m = 0; m < simMatrix.size(); m++)      {
350                                 out << lookup[m]->getGroup() << '\t';
351                                 for (int n = 0; n < simMatrix[m].size(); n++)   {
352                                         out << simMatrix[m][n] << '\t'; 
353                                 }
354                                 out << endl;
355                         }
356                 }
357         }
358         catch(exception& e) {
359                 m->errorOut(e, "MatrixOutputCommand", "printSims");
360                 exit(1);
361         }
362 }
363 /***********************************************************/
364 int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
365         try {
366         
367                                 EstOutput data;
368                                 vector<SharedRAbundVector*> subset;
369
370                                 //for each calculator                                                                                           
371                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
372                                         
373                                         //initialize simMatrix
374                                         simMatrix.clear();
375                                         simMatrix.resize(numGroups);
376                                         for (int p = 0; p < simMatrix.size(); p++)      {
377                                                 for (int j = 0; j < simMatrix.size(); j++)      {
378                                                         simMatrix[p].push_back(0.0);
379                                                 }
380                                         }
381                                 
382                                         for (int k = 0; k < thisLookup.size(); k++) { 
383                                                 for (int l = k; l < thisLookup.size(); l++) {
384                                                         if (k != l) { //we dont need to similiarity of a groups to itself
385                                                                 //get estimated similarity between 2 groups
386                                                                 
387                                                                 if (m->control_pressed) { return 0; }
388                                                                 
389                                                                 subset.clear(); //clear out old pair of sharedrabunds
390                                                                 //add new pair of sharedrabunds
391                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
392                                                                 
393                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
394                                                                 //save values in similarity matrix
395                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
396                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
397                                                         }
398                                                 }
399                                         }
400                                         
401                                         exportFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
402                                         m->openOutputFile(exportFileName, out);
403                                         outputNames.push_back(exportFileName); outputTypes["phylip"].push_back(exportFileName);
404                                         
405                                         printSims(out);
406                                         out.close();
407                                         
408                                 }
409
410                                 return 0;
411                 
412         }
413         catch(exception& e) {
414                 m->errorOut(e, "MatrixOutputCommand", "process");
415                 exit(1);
416         }
417 }
418 /***********************************************************/
419
420
421