]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.cpp
merged pat's trim seqs edits with sarah's major overhaul of global data; also added...
[mothur.git] / matrixoutputcommand.cpp
1 /*
2  *  matrixoutputcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 MatrixOutputCommand::MatrixOutputCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string,string> parameters  = parser.getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //make sure the user has already run the read.otu command
54                         if (globaldata->getSharedFile() == "") {
55                                 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
56                                 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
57                         }
58                         
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         line = validParameter.validFile(parameters, "line", false);                             
62                         if (line == "not found") { line = "";  }
63                         else { 
64                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         label = validParameter.validFile(parameters, "label", false);                   
69                         if (label == "not found") { label = ""; }
70                         else { 
71                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
72                                 else { allLines = 1;  }
73                         }
74                         
75                         //make sure user did not use both the line and label parameters
76                         if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
77                         //if the user has not specified any line or labels use the ones from read.otu
78                         else if((line == "") && (label == "")) {  
79                                 allLines = globaldata->allLines; 
80                                 labels = globaldata->labels; 
81                                 lines = globaldata->lines;
82                         }
83                                 
84                         groups = validParameter.validFile(parameters, "groups", false);                 
85                         if (groups == "not found") { groups = ""; }
86                         else { 
87                                 splitAtDash(groups, Groups);
88                                 globaldata->Groups = Groups;
89                         }
90                                 
91                         calc = validParameter.validFile(parameters, "calc", false);                     
92                         if (calc == "not found") { calc = "jclass-thetayc";  }
93                         else { 
94                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
95                         }
96                         splitAtDash(calc, Estimators);
97
98                         if (abort == false) {
99                         
100                                 validCalculator = new ValidCalculators();
101                                 
102                                 int i;
103                                 for (i=0; i<Estimators.size(); i++) {
104                                         if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) { 
105                                                 if (Estimators[i] == "jabund") {        
106                                                         matrixCalculators.push_back(new JAbund());
107                                                 }else if (Estimators[i] == "sorabund") { 
108                                                         matrixCalculators.push_back(new SorAbund());
109                                                 }else if (Estimators[i] == "jclass") { 
110                                                         matrixCalculators.push_back(new Jclass());
111                                                 }else if (Estimators[i] == "sorclass") { 
112                                                         matrixCalculators.push_back(new SorClass());
113                                                 }else if (Estimators[i] == "jest") { 
114                                                         matrixCalculators.push_back(new Jest());
115                                                 }else if (Estimators[i] == "sorest") { 
116                                                         matrixCalculators.push_back(new SorEst());
117                                                 }else if (Estimators[i] == "thetayc") { 
118                                                         matrixCalculators.push_back(new ThetaYC());
119                                                 }else if (Estimators[i] == "thetan") { 
120                                                         matrixCalculators.push_back(new ThetaN());
121                                                 }else if (Estimators[i] == "morisitahorn") { 
122                                                         matrixCalculators.push_back(new MorHorn());
123                                                 }else if (Estimators[i] == "braycurtis") { 
124                                                         matrixCalculators.push_back(new BrayCurtis());
125                                                 }
126                                         }
127                                 }
128                                 
129                         }
130                 }
131                 
132         }
133         catch(exception& e) {
134                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
135                 exit(1);
136         }
137         catch(...) {
138                 cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
139                 exit(1);
140         }       
141 }
142
143 //**********************************************************************************************************************
144
145 void MatrixOutputCommand::help(){
146         try {
147                 cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n";
148                 cout << "The dist.shared command parameters are groups, calc, line and label.  The calc parameter is required, and you may not use line and label at the same time." << "\n";
149                 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
150                 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n";
151                 cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
152                 cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
153                 cout << "The default value for groups is all the groups in your groupfile." << "\n";
154                 cout << "The default value for calc is jclass and thetayc." << "\n";
155                 validCalculator->printCalc("matrix", cout);
156                 cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n";
157                 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
158         }
159         catch(exception& e) {
160                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
161                 exit(1);
162         }
163         catch(...) {
164                 cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
165                 exit(1);
166         }       
167 }
168
169
170 //**********************************************************************************************************************
171
172 MatrixOutputCommand::~MatrixOutputCommand(){
173         delete input;
174         delete read;
175         delete validCalculator;
176 }
177
178 //**********************************************************************************************************************
179
180 int MatrixOutputCommand::execute(){
181         try {
182                 
183                 if (abort == true) {    return 0;       }
184         
185                 int count = 1;  
186                                 
187                 //if the users entered no valid calculators don't execute command
188                 if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
189
190                 //you have groups
191                 read = new ReadOTUFile(globaldata->inputFileName);      
192                 read->read(&*globaldata); 
193                         
194                 input = globaldata->ginput;
195                 lookup = input->getSharedRAbundVectors();
196                 vector<SharedRAbundVector*> lastLookup = lookup;
197                 
198                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
199                 set<string> processedLabels;
200                 set<string> userLabels = labels;
201                 set<int> userLines = lines;
202                                 
203                 if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0;}
204                 
205                 numGroups = lookup.size();
206                                 
207                 //as long as you are not at the end of the file or done wih the lines you want
208                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
209                 
210                         if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){                       
211                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
212                                 process(lookup);
213                                 
214                                 processedLabels.insert(lookup[0]->getLabel());
215                                 userLabels.erase(lookup[0]->getLabel());
216                                 userLines.erase(count);
217                         }
218                         
219                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
220                                 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
221                                 process(lastLookup);
222                                 
223                                 processedLabels.insert(lastLookup[0]->getLabel());
224                                 userLabels.erase(lastLookup[0]->getLabel());
225                         }
226
227                         //prevent memory leak
228                         if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  } }
229                         lastLookup = lookup;                    
230                         
231                         //get next line to process
232                         lookup = input->getSharedRAbundVectors();
233                         count++;
234                 }
235                 
236                 //output error messages about any remaining user labels
237                 set<string>::iterator it;
238                 bool needToRun = false;
239                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
240                         cout << "Your file does not include the label "<< *it; 
241                         if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
242                                 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
243                                 needToRun = true;
244                         }else {
245                                 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
246                         }
247                 }
248                 
249                 //run last line if you need to
250                 if (needToRun == true)  {
251                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
252                         process(lastLookup);            
253                 }
254                 
255                 for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  }
256
257                 
258                 //reset groups parameter
259                 globaldata->Groups.clear();  
260
261                 return 0;
262         }
263         catch(exception& e) {
264                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }
267         catch(...) {
268                 cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
269                 exit(1);
270         }               
271 }
272 /***********************************************************/
273 void MatrixOutputCommand::printSims(ostream& out) {
274         try {
275                 
276                 //output column headers
277                 out << simMatrix.size() << endl;
278                 
279                 for (int m = 0; m < simMatrix.size(); m++)      {
280                         out << lookup[m]->getGroup() << '\t';
281                         for (int n = 0; n < m; n++)     {
282                                 out << simMatrix[m][n] << '\t'; 
283                         }
284                         out << endl;
285                 }
286
287         }
288         catch(exception& e) {
289                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
290                 exit(1);
291         }
292         catch(...) {
293                 cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
294                 exit(1);
295         }               
296 }
297 /***********************************************************/
298 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
299         try {
300         
301                                 EstOutput data;
302                                 vector<SharedRAbundVector*> subset;
303
304                                 //for each calculator                                                                                           
305                                 for(int i = 0 ; i < matrixCalculators.size(); i++) {
306                                         
307                                         //initialize simMatrix
308                                         simMatrix.clear();
309                                         simMatrix.resize(numGroups);
310                                         for (int m = 0; m < simMatrix.size(); m++)      {
311                                                 for (int j = 0; j < simMatrix.size(); j++)      {
312                                                         simMatrix[m].push_back(0.0);
313                                                 }
314                                         }
315                                 
316                                         for (int k = 0; k < thisLookup.size(); k++) { 
317                                                 for (int l = k; l < thisLookup.size(); l++) {
318                                                         if (k != l) { //we dont need to similiarity of a groups to itself
319                                                                 //get estimated similarity between 2 groups
320                                                                 
321                                                                 subset.clear(); //clear out old pair of sharedrabunds
322                                                                 //add new pair of sharedrabunds
323                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
324                                                                 
325                                                                 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
326                                                                 //save values in similarity matrix
327                                                                 simMatrix[k][l] = 1.0 - data[0];  //convert similiarity to distance
328                                                                 simMatrix[l][k] = 1.0 - data[0];  //convert similiarity to distance
329                                                         }
330                                                 }
331                                         }
332                                         
333                                         exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
334                                         openOutputFile(exportFileName, out);
335                                         printSims(out);
336                                         out.close();
337                                         
338                                 }
339
340         
341                 
342         }
343         catch(exception& e) {
344                 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }
347         catch(...) {
348                 cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349                 exit(1);
350         }               
351 }
352 /***********************************************************/
353
354
355