]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
remove.rare command
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34 #include "odum.h"
35 #include "canberra.h"
36 #include "structeuclidean.h"
37 #include "structchord.h"
38 #include "hellinger.h"
39 #include "manhattan.h"
40 #include "structpearson.h"
41 #include "soergel.h"
42 #include "spearman.h"
43 #include "structkulczynski.h"
44 #include "structchi2.h"
45 #include "speciesprofile.h"
46 #include "hamming.h"
47 #include "gower.h"
48 #include "memchi2.h"
49 #include "memchord.h"
50 #include "memeuclidean.h"
51 #include "mempearson.h"
52 //**********************************************************************************************************************
53 vector<string> MatrixOutputCommand::getValidParameters(){       
54         try {
55                 string Array[] =  {"label","calc","groups","outputdir","inputdir", "output"};
56                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MatrixOutputCommand", "getValidParameters");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 MatrixOutputCommand::MatrixOutputCommand(){     
66         try {
67                 abort = true;
68                 //initialize outputTypes
69                 vector<string> tempOutNames;
70                 outputTypes["phylip"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 vector<string> MatrixOutputCommand::getRequiredParameters(){    
79         try {
80                 vector<string> myArray;
81                 return myArray;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "MatrixOutputCommand", "getRequiredParameters");
85                 exit(1);
86         }
87 }
88 //**********************************************************************************************************************
89 vector<string> MatrixOutputCommand::getRequiredFiles(){ 
90         try {
91                 string Array[] =  {"shared"};
92                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
93                 return myArray;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "MatrixOutputCommand", "getRequiredFiles");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101
102 MatrixOutputCommand::MatrixOutputCommand(string option)  {
103         try {
104                 globaldata = GlobalData::getInstance();
105                 abort = false;
106                 allLines = 1;
107                 labels.clear();
108                 Groups.clear();
109                 Estimators.clear();
110                 
111                 //allow user to run help
112                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
113                 
114                 else {
115                         //valid paramters for this command
116                         string Array[] =  {"label","calc","groups","outputdir","inputdir", "output"};
117                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters  = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                 
124                         //check to make sure all parameters are valid for command
125                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["phylip"] = tempOutNames;
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
135                                 outputDir = ""; 
136                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
137                         }
138                         
139                         //make sure the user has already run the read.otu command
140                         if (globaldata->getSharedFile() == "") {
141                                 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); m->mothurOutEndLine(); abort = true; }
142                                 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); m->mothurOutEndLine(); abort = true; }
143                         }
144                         
145                         //check for optional parameter and set defaults
146                         // ...at some point should added some additional type checking...
147                         label = validParameter.validFile(parameters, "label", false);                   
148                         if (label == "not found") { label = ""; }
149                         else { 
150                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
151                                 else { allLines = 1;  }
152                         }
153                         
154                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "lt"; }
155                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
156                         
157                         //if the user has not specified any labels use the ones from read.otu
158                         if (label == "") {  
159                                 allLines = globaldata->allLines; 
160                                 labels = globaldata->labels; 
161                         }
162                                 
163                         groups = validParameter.validFile(parameters, "groups", false);                 
164                         if (groups == "not found") { groups = ""; }
165                         else { 
166                                 m->splitAtDash(groups, Groups);
167                                 globaldata->Groups = Groups;
168                         }
169                                 
170                         calc = validParameter.validFile(parameters, "calc", false);                     
171                         if (calc == "not found") { calc = "jclass-thetayc";  }
172                         else { 
173                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
174                         }
175                         m->splitAtDash(calc, Estimators);
176
177                         if (abort == false) {
178                         
179                                 validCalculator = new ValidCalculators();
180                                 
181                                 int i;
182                                 for (i=0; i<Estimators.size(); i++) {
183                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
184                                                 if (Estimators[i] == "sharedsobs") { 
185                                                         matrixCalculators.push_back(new SharedSobsCS());
186                                                 }else if (Estimators[i] == "sharedchao") { 
187                                                         matrixCalculators.push_back(new SharedChao1());
188                                                 }else if (Estimators[i] == "sharedace") { 
189                                                         matrixCalculators.push_back(new SharedAce());
190                                                 }else if (Estimators[i] == "jabund") {  
191                                                         matrixCalculators.push_back(new JAbund());
192                                                 }else if (Estimators[i] == "sorabund") { 
193                                                         matrixCalculators.push_back(new SorAbund());
194                                                 }else if (Estimators[i] == "jclass") { 
195                                                         matrixCalculators.push_back(new Jclass());
196                                                 }else if (Estimators[i] == "sorclass") { 
197                                                         matrixCalculators.push_back(new SorClass());
198                                                 }else if (Estimators[i] == "jest") { 
199                                                         matrixCalculators.push_back(new Jest());
200                                                 }else if (Estimators[i] == "sorest") { 
201                                                         matrixCalculators.push_back(new SorEst());
202                                                 }else if (Estimators[i] == "thetayc") { 
203                                                         matrixCalculators.push_back(new ThetaYC());
204                                                 }else if (Estimators[i] == "thetan") { 
205                                                         matrixCalculators.push_back(new ThetaN());
206                                                 }else if (Estimators[i] == "kstest") { 
207                                                         matrixCalculators.push_back(new KSTest());
208                                                 }else if (Estimators[i] == "sharednseqs") { 
209                                                         matrixCalculators.push_back(new SharedNSeqs());
210                                                 }else if (Estimators[i] == "ochiai") { 
211                                                         matrixCalculators.push_back(new Ochiai());
212                                                 }else if (Estimators[i] == "anderberg") { 
213                                                         matrixCalculators.push_back(new Anderberg());
214                                                 }else if (Estimators[i] == "kulczynski") { 
215                                                         matrixCalculators.push_back(new Kulczynski());
216                                                 }else if (Estimators[i] == "kulczynskicody") { 
217                                                         matrixCalculators.push_back(new KulczynskiCody());
218                                                 }else if (Estimators[i] == "lennon") { 
219                                                         matrixCalculators.push_back(new Lennon());
220                                                 }else if (Estimators[i] == "morisitahorn") { 
221                                                         matrixCalculators.push_back(new MorHorn());
222                                                 }else if (Estimators[i] == "braycurtis") { 
223                                                         matrixCalculators.push_back(new BrayCurtis());
224                                                 }else if (Estimators[i] == "whittaker") { 
225                                                         matrixCalculators.push_back(new Whittaker());
226                                                 }else if (Estimators[i] == "odum") { 
227                                                         matrixCalculators.push_back(new Odum());
228                                                 }else if (Estimators[i] == "canberra") { 
229                                                         matrixCalculators.push_back(new Canberra());
230                                                 }else if (Estimators[i] == "structeuclidean") { 
231                                                         matrixCalculators.push_back(new StructEuclidean());
232                                                 }else if (Estimators[i] == "structchord") { 
233                                                         matrixCalculators.push_back(new StructChord());
234                                                 }else if (Estimators[i] == "hellinger") { 
235                                                         matrixCalculators.push_back(new Hellinger());
236                                                 }else if (Estimators[i] == "manhattan") { 
237                                                         matrixCalculators.push_back(new Manhattan());
238                                                 }else if (Estimators[i] == "structpearson") { 
239                                                         matrixCalculators.push_back(new StructPearson());
240                                                 }else if (Estimators[i] == "soergel") { 
241                                                         matrixCalculators.push_back(new Soergel());
242                                                 }else if (Estimators[i] == "spearman") { 
243                                                         matrixCalculators.push_back(new Spearman());
244                                                 }else if (Estimators[i] == "structkulczynski") { 
245                                                         matrixCalculators.push_back(new StructKulczynski());
246                                                 }else if (Estimators[i] == "speciesprofile") { 
247                                                         matrixCalculators.push_back(new SpeciesProfile());
248                                                 }else if (Estimators[i] == "hamming") { 
249                                                         matrixCalculators.push_back(new Hamming());
250                                                 }else if (Estimators[i] == "structchi2") { 
251                                                         matrixCalculators.push_back(new StructChi2());
252                                                 }else if (Estimators[i] == "gower") { 
253                                                         matrixCalculators.push_back(new Gower());
254                                                 }else if (Estimators[i] == "memchi2") { 
255                                                         matrixCalculators.push_back(new MemChi2());
256                                                 }else if (Estimators[i] == "memchord") { 
257                                                         matrixCalculators.push_back(new MemChord());
258                                                 }else if (Estimators[i] == "memeuclidean") { 
259                                                         matrixCalculators.push_back(new MemEuclidean());
260                                                 }else if (Estimators[i] == "mempearson") { 
261                                                         matrixCalculators.push_back(new MemPearson());
262                                                 }
263                                         }
264                                 }
265                                 
266                         }
267                 }
268                 
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
272                 exit(1);
273         }
274 }
275
276 //**********************************************************************************************************************
277
278 void MatrixOutputCommand::help(){
279         try {
280                 m->mothurOut("The dist.shared command can only be executed after a successful read.otu command.\n");
281                 m->mothurOut("The dist.shared command parameters are groups, calc, output and label.  No parameters are required.\n");
282                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
283                 m->mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n");
284                 m->mothurOut("The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n");
285                 m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n");
286                 m->mothurOut("Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n");
287                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
288                 m->mothurOut("The default value for calc is jclass and thetayc.\n");
289                 validCalculator->printCalc("matrix", cout);
290                 m->mothurOut("The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose.\n");
291                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
292         }
293         catch(exception& e) {
294                 m->errorOut(e, "MatrixOutputCommand", "help");
295                 exit(1);
296         }
297 }
298
299
300 //**********************************************************************************************************************
301
302 MatrixOutputCommand::~MatrixOutputCommand(){
303         if (abort == false) {
304                 delete input; globaldata->ginput = NULL;
305                 delete read;
306                 delete validCalculator;
307         }
308 }
309
310 //**********************************************************************************************************************
311
312 int MatrixOutputCommand::execute(){
313         try {
314                 
315                 if (abort == true) {    return 0;       }
316                         
317                 //if the users entered no valid calculators don't execute command
318                 if (matrixCalculators.size() == 0) { m->mothurOut("No valid calculators."); m->mothurOutEndLine();  return 0; }
319
320                 //you have groups
321                 read = new ReadOTUFile(globaldata->inputFileName);      
322                 read->read(&*globaldata); 
323                         
324                 input = globaldata->ginput;
325                 lookup = input->getSharedRAbundVectors();
326                 string lastLabel = lookup[0]->getLabel();
327                 
328                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
329                 set<string> processedLabels;
330                 set<string> userLabels = labels;
331                                         
332                 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); return 0;}
333                 
334                 numGroups = lookup.size();
335                 
336                 if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); return 0;  }
337                                 
338                 //as long as you are not at the end of the file or done wih the lines you want
339                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
340                 
341                         if (m->control_pressed) { outputTypes.clear(); delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
342                 
343                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
344                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
345                                 process(lookup);
346                                 
347                                 processedLabels.insert(lookup[0]->getLabel());
348                                 userLabels.erase(lookup[0]->getLabel());
349                         }
350                         
351                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
352                                 string saveLabel = lookup[0]->getLabel();
353                                 
354                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
355                                 lookup = input->getSharedRAbundVectors(lastLabel);
356
357                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
358                                 process(lookup);
359                                 
360                                 processedLabels.insert(lookup[0]->getLabel());
361                                 userLabels.erase(lookup[0]->getLabel());
362                                 
363                                 //restore real lastlabel to save below
364                                 lookup[0]->setLabel(saveLabel);
365                         }
366
367                         lastLabel = lookup[0]->getLabel();                      
368                         
369                         //get next line to process
370                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
371                         lookup = input->getSharedRAbundVectors();
372                 }
373                 
374                 if (m->control_pressed) { outputTypes.clear(); delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
375
376                 //output error messages about any remaining user labels
377                 set<string>::iterator it;
378                 bool needToRun = false;
379                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
380                         m->mothurOut("Your file does not include the label " + *it);  
381                         if (processedLabels.count(lastLabel) != 1) {
382                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
383                                 needToRun = true;
384                         }else {
385                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
386                         }
387                 }
388                 
389                 if (m->control_pressed) { outputTypes.clear(); delete read; delete input; globaldata->ginput = NULL;  for (int i = 0; i < outputNames.size(); i++) {    remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
390
391                 //run last label if you need to
392                 if (needToRun == true)  {
393                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {  delete lookup[i]; }  } 
394                         lookup = input->getSharedRAbundVectors(lastLabel);
395
396                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
397                         process(lookup);
398                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
399                 }
400                 
401                 if (m->control_pressed) { outputTypes.clear();  delete read; delete input; globaldata->ginput = NULL;  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0;  }
402                 
403                 //reset groups parameter
404                 globaldata->Groups.clear();  
405                 
406                 m->mothurOutEndLine();
407                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
408                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
409                 m->mothurOutEndLine();
410
411
412                 return 0;
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "MatrixOutputCommand", "execute");
416                 exit(1);
417         }
418 }
419 /***********************************************************/
420 void MatrixOutputCommand::printSims(ostream& out) {
421         try {
422                 
423                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
424                 
425                 //output num seqs
426                 out << simMatrix.size() << endl;
427                 
428                 if (output == "lt") {
429                         for (int m = 0; m < simMatrix.size(); m++)      {
430                                 out << lookup[m]->getGroup() << '\t';
431                                 for (int n = 0; n < m; n++)     {
432                                         out << simMatrix[m][n] << '\t'; 
433                                 }
434                                 out << endl;
435                         }
436                 }else{
437                         for (int m = 0; m < simMatrix.size(); m++)      {
438                                 out << lookup[m]->getGroup() << '\t';
439                                 for (int n = 0; n < simMatrix[m].size(); n++)   {
440                                         out << simMatrix[m][n] << '\t'; 
441                                 }
442                                 out << endl;
443                         }
444                 }
445         }
446         catch(exception& e) {
447                 m->errorOut(e, "MatrixOutputCommand", "printSims");
448                 exit(1);
449         }
450 }
451 /***********************************************************/
452 int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
453         try {
454         
455                                 EstOutput data;
456                                 vector<SharedRAbundVector*> subset;
457
458                                 //for each calculator                                                                                           
459                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
460                                         
461                                         //initialize simMatrix
462                                         simMatrix.clear();
463                                         simMatrix.resize(numGroups);
464                                         for (int p = 0; p < simMatrix.size(); p++)      {
465                                                 for (int j = 0; j < simMatrix.size(); j++)      {
466                                                         simMatrix[p].push_back(0.0);
467                                                 }
468                                         }
469                                 
470                                         for (int k = 0; k < thisLookup.size(); k++) { 
471                                                 for (int l = k; l < thisLookup.size(); l++) {
472                                                         if (k != l) { //we dont need to similiarity of a groups to itself
473                                                                 //get estimated similarity between 2 groups
474                                                                 
475                                                                 if (m->control_pressed) { return 0; }
476                                                                 
477                                                                 subset.clear(); //clear out old pair of sharedrabunds
478                                                                 //add new pair of sharedrabunds
479                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
480                                                                 
481                                                                 //if this calc needs all groups to calculate the pair load all groups
482                                                                 if (matrixCalculators[i]->getNeedsAll()) { 
483                                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
484                                                                         for (int w = 0; w < thisLookup.size(); w++) {
485                                                                                 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
486                                                                         }
487                                                                 }
488                                                                 
489                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
490                                                                 //save values in similarity matrix
491                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
492                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
493                                                         }
494                                                 }
495                                         }
496                                         
497                                         exportFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
498                                         m->openOutputFile(exportFileName, out);
499                                         outputNames.push_back(exportFileName); outputTypes["phylip"].push_back(exportFileName);
500                                         
501                                         printSims(out);
502                                         out.close();
503                                         
504                                 }
505
506                                 return 0;
507                 
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "MatrixOutputCommand", "process");
511                 exit(1);
512         }
513 }
514 /***********************************************************/
515
516
517