]> git.donarmstrong.com Git - mothur.git/blob - subsamplecommand.cpp
added cluster.classic command
[mothur.git] / subsamplecommand.cpp
1 /*
2  *  subsamplecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "subsamplecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SubSampleCommand::getValidParameters(){  
14         try {
15                 string Array[] =  {"groups","label","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SubSampleCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SubSampleCommand::SubSampleCommand(){   
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["shared"] = tempOutNames;
31                 outputTypes["list"] = tempOutNames;
32                 outputTypes["rabund"] = tempOutNames;
33                 outputTypes["sabund"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SubSampleCommand::getRequiredParameters(){       
42         try {
43                 vector<string> myArray;
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "SubSampleCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> SubSampleCommand::getRequiredFiles(){    
53         try {
54                 string Array[] =  {"shared","list","rabund","sabund","or"};
55                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SubSampleCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 SubSampleCommand::SubSampleCommand(string option) {
65         try {
66                 globaldata = GlobalData::getInstance();
67                 abort = false;
68                 allLines = 1;
69                 labels.clear();
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string AlignArray[] =  {"groups","label","outputdir","inputdir"};
77                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         
84                         //check to make sure all parameters are valid for command
85                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //initialize outputTypes
90                         vector<string> tempOutNames;
91                         outputTypes["shared"] = tempOutNames;
92                         outputTypes["list"] = tempOutNames;
93                         outputTypes["rabund"] = tempOutNames;
94                         outputTypes["sabund"] = tempOutNames;
95                                         
96                         //if the user changes the output directory command factory will send this info to us in the output parameter 
97                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
98                                 outputDir = ""; 
99                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
100                         }
101                         
102                         //make sure the user has already run the read.otu command
103                         if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; }
104
105                         //check for optional parameter and set defaults
106                         // ...at some point should added some additional type checking...
107                         label = validParameter.validFile(parameters, "label", false);                   
108                         if (label == "not found") { label = ""; }
109                         else { 
110                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
111                                 else { allLines = 1;  }
112                         }
113                         
114                         //if the user has not specified any labels use the ones from read.otu
115                         if (label == "") {  
116                                 allLines = globaldata->allLines; 
117                                 labels = globaldata->labels; 
118                         }
119                         
120                         groups = validParameter.validFile(parameters, "groups", false);                 
121                         if (groups == "not found") { groups = ""; pickedGroups = false; }
122                         else { 
123                                 pickedGroups = true;
124                                 m->splitAtDash(groups, Groups);
125                                 globaldata->Groups = Groups;
126                         }
127                         
128                 }
129
130         }
131         catch(exception& e) {
132                 m->errorOut(e, "SubSampleCommand", "SubSampleCommand");
133                 exit(1);
134         }
135 }
136
137 //**********************************************************************************************************************
138
139 void SubSampleCommand::help(){
140         try {
141                 m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n");
142                 m->mothurOut("The get.relabund command parameters are groups, scale and label.  No parameters are required.\n");
143                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
144                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
145                 m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n");
146                 m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n");
147                 m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n");
148                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
149                 m->mothurOut("The get.relabund command outputs a .relabund file.\n");
150                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
151
152         }
153         catch(exception& e) {
154                 m->errorOut(e, "SubSampleCommand", "help");
155                 exit(1);
156         }
157 }
158
159 //**********************************************************************************************************************
160
161 SubSampleCommand::~SubSampleCommand(){
162 }
163
164 //**********************************************************************************************************************
165
166 int SubSampleCommand::execute(){
167         try {
168         
169                 if (abort == true) { return 0; }
170                 
171                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" +  m->getExtension(globaldata->inputFileName);
172                 ofstream out;
173                 m->openOutputFile(outputFileName, out);
174                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
175                 
176                 string format = globaldata->getFormat();
177                 
178                 read = new ReadOTUFile(globaldata->inputFileName);      
179                 read->read(&*globaldata); 
180                 input = globaldata->ginput;
181                 
182                 if (format == "sharedfile") {
183                         lookup = input->getSharedRAbundVectors();
184                         outputTypes["shared"].push_back(outputFileName);
185                         getSubSampleShared(lookup, out);
186                 }else if (format == "list") { 
187                         list = globaldata->glist;
188                         outputTypes["list"].push_back(outputFileName);
189                         //getSubSamplesList();
190                 }else if (format == "rabund") { 
191                         rabund = globaldata->rabund;
192                         outputTypes["rabund"].push_back(outputFileName);
193                         //getSubSamplesRabund();
194                 
195                 }else if (format == "sabund") { 
196                         sabund = globaldata->sabund;
197                         outputTypes["sabund"].push_back(outputFileName);
198                         //getSubSamplesSabund();
199                 }
200                 
201                 out.close();
202                                         
203                 //reset groups parameter
204                 delete input; globaldata->ginput = NULL;
205                 delete read;
206                 
207                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
208                 
209                 m->mothurOutEndLine();
210                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
211                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); 
212                 m->mothurOutEndLine();
213                 
214                 return 0;
215         }
216         catch(exception& e) {
217                 m->errorOut(e, "SubSampleCommand", "execute");
218                 exit(1);
219         }
220 }
221 //**********************************************************************************************************************
222 int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup, ofstream& filename) {
223         try {
224         
225                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
226                 set<string> processedLabels;
227                 set<string> userLabels = labels;
228
229                 string lastLabel = lookup[0]->getLabel();
230         
231                 //as long as you are not at the end of the file or done wih the lines you want
232                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
233                         if (m->control_pressed) {  return 0;  }
234         
235                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
236
237                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
238                                 
239                                 //process lookup
240                                 
241                                                                 
242                                                                                                                                 
243                                 processedLabels.insert(lookup[0]->getLabel());
244                                 userLabels.erase(lookup[0]->getLabel());
245                         }
246                         
247                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
248                                 string saveLabel = lookup[0]->getLabel();
249                 
250                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
251                 
252                                 lookup = input->getSharedRAbundVectors(lastLabel);
253                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
254                                 
255                                 //process lookup                                
256                                 processedLabels.insert(lookup[0]->getLabel());
257                                 userLabels.erase(lookup[0]->getLabel());
258                                 
259                                 //restore real lastlabel to save below
260                                 lookup[0]->setLabel(saveLabel);
261                         }
262                         
263                         lastLabel = lookup[0]->getLabel();
264                         //prevent memory leak
265                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
266                                                 
267                         //get next line to process
268                         lookup = input->getSharedRAbundVectors();                               
269                 }
270                 
271                 
272                 if (m->control_pressed) {  return 0;  }
273
274                 //output error messages about any remaining user labels
275                 set<string>::iterator it;
276                 bool needToRun = false;
277                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
278                         m->mothurOut("Your file does not include the label " + *it); 
279                         if (processedLabels.count(lastLabel) != 1) {
280                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
281                                 needToRun = true;
282                         }else {
283                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
284                         }
285                 }
286         
287                 //run last label if you need to
288                 if (needToRun == true)  {
289                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
290                         lookup = input->getSharedRAbundVectors(lastLabel);
291                         
292                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
293                         
294                         //process lookup
295                         
296                         
297                         
298                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
299                 }
300         
301                 //reset groups parameter
302                 globaldata->Groups.clear();  
303                                 
304                 return 0;
305  
306         }
307         catch(exception& e) {
308                 m->errorOut(e, "SubSampleCommand", "getSubSampleShared");
309                 exit(1);
310         }
311 }
312
313
314 //**********************************************************************************************************************
315 int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
316         try {
317                 
318                 vector<SharedRAbundVector*> newLookup;
319                 for (int i = 0; i < thislookup.size(); i++) {
320                         SharedRAbundVector* temp = new SharedRAbundVector();
321                         temp->setLabel(thislookup[i]->getLabel());
322                         temp->setGroup(thislookup[i]->getGroup());
323                         newLookup.push_back(temp);
324                 }
325                 
326                 //for each bin
327                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
328                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
329                 
330                         //look at each sharedRabund and make sure they are not all zero
331                         bool allZero = true;
332                         for (int j = 0; j < thislookup.size(); j++) {
333                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
334                         }
335                         
336                         //if they are not all zero add this bin
337                         if (!allZero) {
338                                 for (int j = 0; j < thislookup.size(); j++) {
339                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
340                                 }
341                         }
342                 }
343
344                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
345
346                 thislookup = newLookup;
347                 
348                 return 0;
349  
350         }
351         catch(exception& e) {
352                 m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
353                 exit(1);
354         }
355 }
356
357 //**********************************************************************************************************************
358
359