]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
added warning about merging with something above cutoff to cluster. working on chimeras
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 MatrixOutputCommand::MatrixOutputCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Groups.clear();
32                 Estimators.clear();
33                 
34                 //allow user to run help
35                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
36                 
37                 else {
38                         //valid paramters for this command
39                         string Array[] =  {"label","calc","groups","outputdir","inputdir", "output"};
40                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
41                         
42                         OptionParser parser(option);
43                         map<string,string> parameters  = parser.getParameters();
44                         
45                         ValidParameters validParameter;
46                 
47                         //check to make sure all parameters are valid for command
48                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
49                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
50                         }
51                         
52                         //if the user changes the output directory command factory will send this info to us in the output parameter 
53                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
54                                 outputDir = ""; 
55                                 outputDir += hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it  
56                         }
57                         
58                         //make sure the user has already run the read.otu command
59                         if (globaldata->getSharedFile() == "") {
60                                 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); mothurOutEndLine(); abort = true; }
61                                 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the dist.shared command."); mothurOutEndLine(); abort = true; }
62                         }
63                         
64                         //check for optional parameter and set defaults
65                         // ...at some point should added some additional type checking...
66                         label = validParameter.validFile(parameters, "label", false);                   
67                         if (label == "not found") { label = ""; }
68                         else { 
69                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
70                                 else { allLines = 1;  }
71                         }
72                         
73                         output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "lt"; }
74                         if ((output != "lt") && (output != "square")) { mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); mothurOutEndLine(); output = "lt"; }
75                         
76                         //if the user has not specified any labels use the ones from read.otu
77                         if (label == "") {  
78                                 allLines = globaldata->allLines; 
79                                 labels = globaldata->labels; 
80                         }
81                                 
82                         groups = validParameter.validFile(parameters, "groups", false);                 
83                         if (groups == "not found") { groups = ""; }
84                         else { 
85                                 splitAtDash(groups, Groups);
86                                 globaldata->Groups = Groups;
87                         }
88                                 
89                         calc = validParameter.validFile(parameters, "calc", false);                     
90                         if (calc == "not found") { calc = "jclass-thetayc";  }
91                         else { 
92                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
93                         }
94                         splitAtDash(calc, Estimators);
95
96                         if (abort == false) {
97                         
98                                 validCalculator = new ValidCalculators();
99                                 
100                                 int i;
101                                 for (i=0; i<Estimators.size(); i++) {
102                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
103                                                 if (Estimators[i] == "jabund") {        
104                                                         matrixCalculators.push_back(new JAbund());
105                                                 }else if (Estimators[i] == "sorabund") { 
106                                                         matrixCalculators.push_back(new SorAbund());
107                                                 }else if (Estimators[i] == "jclass") { 
108                                                         matrixCalculators.push_back(new Jclass());
109                                                 }else if (Estimators[i] == "sorclass") { 
110                                                         matrixCalculators.push_back(new SorClass());
111                                                 }else if (Estimators[i] == "jest") { 
112                                                         matrixCalculators.push_back(new Jest());
113                                                 }else if (Estimators[i] == "sorest") { 
114                                                         matrixCalculators.push_back(new SorEst());
115                                                 }else if (Estimators[i] == "thetayc") { 
116                                                         matrixCalculators.push_back(new ThetaYC());
117                                                 }else if (Estimators[i] == "thetan") { 
118                                                         matrixCalculators.push_back(new ThetaN());
119                                                 }else if (Estimators[i] == "morisitahorn") { 
120                                                         matrixCalculators.push_back(new MorHorn());
121                                                 }else if (Estimators[i] == "braycurtis") { 
122                                                         matrixCalculators.push_back(new BrayCurtis());
123                                                 }
124                                         }
125                                 }
126                                 
127                         }
128                 }
129                 
130         }
131         catch(exception& e) {
132                 errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
133                 exit(1);
134         }
135 }
136
137 //**********************************************************************************************************************
138
139 void MatrixOutputCommand::help(){
140         try {
141                 mothurOut("The dist.shared command can only be executed after a successful read.otu command.\n");
142                 mothurOut("The dist.shared command parameters are groups, calc, output and label.  No parameters are required.\n");
143                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
144                 mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n");
145                 mothurOut("The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n");
146                 mothurOut("The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n");
147                 mothurOut("Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n");
148                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
149                 mothurOut("The default value for calc is jclass and thetayc.\n");
150                 validCalculator->printCalc("matrix", cout);
151                 mothurOut("The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose.\n");
152                 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
153         }
154         catch(exception& e) {
155                 errorOut(e, "MatrixOutputCommand", "help");
156                 exit(1);
157         }
158 }
159
160
161 //**********************************************************************************************************************
162
163 MatrixOutputCommand::~MatrixOutputCommand(){
164         if (abort == false) {
165                 delete input; globaldata->ginput = NULL;
166                 delete read;
167                 delete validCalculator;
168         }
169 }
170
171 //**********************************************************************************************************************
172
173 int MatrixOutputCommand::execute(){
174         try {
175                 
176                 if (abort == true) {    return 0;       }
177                         
178                 //if the users entered no valid calculators don't execute command
179                 if (matrixCalculators.size() == 0) { mothurOut("No valid calculators."); mothurOutEndLine();  return 0; }
180
181                 //you have groups
182                 read = new ReadOTUFile(globaldata->inputFileName);      
183                 read->read(&*globaldata); 
184                         
185                 input = globaldata->ginput;
186                 lookup = input->getSharedRAbundVectors();
187                 string lastLabel = lookup[0]->getLabel();
188                 
189                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
190                 set<string> processedLabels;
191                 set<string> userLabels = labels;
192                                         
193                 if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups.  I cannot run the command."); mothurOutEndLine(); return 0;}
194                 
195                 numGroups = lookup.size();
196                                 
197                 //as long as you are not at the end of the file or done wih the lines you want
198                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
199                 
200                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
201                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
202                                 process(lookup);
203                                 
204                                 processedLabels.insert(lookup[0]->getLabel());
205                                 userLabels.erase(lookup[0]->getLabel());
206                         }
207                         
208                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
209                                 string saveLabel = lookup[0]->getLabel();
210                                 
211                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
212                                 lookup = input->getSharedRAbundVectors(lastLabel);
213
214                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
215                                 process(lookup);
216                                 
217                                 processedLabels.insert(lookup[0]->getLabel());
218                                 userLabels.erase(lookup[0]->getLabel());
219                                 
220                                 //restore real lastlabel to save below
221                                 lookup[0]->setLabel(saveLabel);
222                         }
223
224                         lastLabel = lookup[0]->getLabel();                      
225                         
226                         //get next line to process
227                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
228                         lookup = input->getSharedRAbundVectors();
229                 }
230                 
231                 //output error messages about any remaining user labels
232                 set<string>::iterator it;
233                 bool needToRun = false;
234                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
235                         mothurOut("Your file does not include the label " + *it);  
236                         if (processedLabels.count(lastLabel) != 1) {
237                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
238                                 needToRun = true;
239                         }else {
240                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
241                         }
242                 }
243                 
244                 //run last label if you need to
245                 if (needToRun == true)  {
246                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {  delete lookup[i]; }  } 
247                         lookup = input->getSharedRAbundVectors(lastLabel);
248
249                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
250                         process(lookup);
251                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
252                 }
253                 
254                                 
255                 //reset groups parameter
256                 globaldata->Groups.clear();  
257
258                 return 0;
259         }
260         catch(exception& e) {
261                 errorOut(e, "MatrixOutputCommand", "execute");
262                 exit(1);
263         }
264 }
265 /***********************************************************/
266 void MatrixOutputCommand::printSims(ostream& out) {
267         try {
268                 
269                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
270                 
271                 //output num seqs
272                 out << simMatrix.size() << endl;
273                 
274                 if (output == "lt") {
275                         for (int m = 0; m < simMatrix.size(); m++)      {
276                                 out << lookup[m]->getGroup() << '\t';
277                                 for (int n = 0; n < m; n++)     {
278                                         out << simMatrix[m][n] << '\t'; 
279                                 }
280                                 out << endl;
281                         }
282                 }else{
283                         for (int m = 0; m < simMatrix.size(); m++)      {
284                                 out << lookup[m]->getGroup() << '\t';
285                                 for (int n = 0; n < simMatrix[m].size(); n++)   {
286                                         out << simMatrix[m][n] << '\t'; 
287                                 }
288                                 out << endl;
289                         }
290                 }
291         }
292         catch(exception& e) {
293                 errorOut(e, "MatrixOutputCommand", "printSims");
294                 exit(1);
295         }
296 }
297 /***********************************************************/
298 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
299         try {
300         
301                                 EstOutput data;
302                                 vector<SharedRAbundVector*> subset;
303
304                                 //for each calculator                                                                                           
305                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
306                                         
307                                         //initialize simMatrix
308                                         simMatrix.clear();
309                                         simMatrix.resize(numGroups);
310                                         for (int m = 0; m < simMatrix.size(); m++)      {
311                                                 for (int j = 0; j < simMatrix.size(); j++)      {
312                                                         simMatrix[m].push_back(0.0);
313                                                 }
314                                         }
315                                 
316                                         for (int k = 0; k < thisLookup.size(); k++) { 
317                                                 for (int l = k; l < thisLookup.size(); l++) {
318                                                         if (k != l) { //we dont need to similiarity of a groups to itself
319                                                                 //get estimated similarity between 2 groups
320                                                                 
321                                                                 subset.clear(); //clear out old pair of sharedrabunds
322                                                                 //add new pair of sharedrabunds
323                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
324                                                                 
325                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
326                                                                 //save values in similarity matrix
327                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
328                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
329                                                         }
330                                                 }
331                                         }
332                                         
333                                         exportFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
334                                         openOutputFile(exportFileName, out);
335                                         printSims(out);
336                                         out.close();
337                                         
338                                 }
339
340         
341                 
342         }
343         catch(exception& e) {
344                 errorOut(e, "MatrixOutputCommand", "process");
345                 exit(1);
346         }
347 }
348 /***********************************************************/
349
350
351