]> git.donarmstrong.com Git - mothur.git/blob - filtersharedcommand.cpp
4d1c301edf9566b8101e46320e71dc84f8479b67
[mothur.git] / filtersharedcommand.cpp
1 //
2 //  filtersharedcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/4/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "filtersharedcommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> FilterSharedCommand::setParameters(){    
13         try {           
14                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared);
15                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
16                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17                 CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent);
18         CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund);
19         CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal);
20         CommandParameter pminnumsamples("minnumsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminnumsamples);
21         CommandParameter pminpercentsamples("minpercentsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercentsamples);
22                 CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "FilterSharedCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string FilterSharedCommand::getHelpString(){    
37         try {
38                 string helpString = "";
39                 helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n";
40                 helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, minnumsamples, minpercentsamples, makerare, groups and label.  You must provide a shared file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
42                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
43         
44                 helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU.  If any samples abundance falls below the minimum, the OTU is removed. Default=0\n";
45         helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n";
46         helpString += "The minnumsamples parameter allows you indicate the minimum number of samples present in an OTU. If the number of samples present falls below the minimum, the OTU is removed. Default=0.\n";
47         helpString += "The minpercentsamples parameter allows you indicate the minimum percent of sample present in an OTU. For example, if the total number of samples is 10, the number present is 3, and the minpercentsamples=50. The OTU's precent of samples is 0.333, the minimum is 0.50, so the OTU will be removed. Default=0.\n";
48         helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU.  If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n";
49         
50                 helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed.  This will preserve the number of reads in your dataset. Default=T\n";
51                 helpString += "The filter.shared command should be in the following format: filter.shared(shared=yourSharedFile, minabund=yourMinAbund, groups=yourGroups, label=yourLabels).\n";
52                 helpString += "Example filter.shared(shared=final.an.shared, minabund=10).\n";
53                 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
54                 helpString += "The filter.shared command outputs a .filter.shared file.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "FilterSharedCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 string FilterSharedCommand::getOutputPattern(string type) {
65     try {
66         string pattern = "";
67         
68         if (type == "shared")      {   pattern = "[filename],[distance],filter,[extension]";    }
69         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
70         
71         return pattern;
72     }
73     catch(exception& e) {
74         m->errorOut(e, "FilterSharedCommand", "getOutputPattern");
75         exit(1);
76     }
77 }
78 //**********************************************************************************************************************
79 FilterSharedCommand::FilterSharedCommand(){     
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["shared"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "FilterSharedCommand", "GetRelAbundCommand");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 FilterSharedCommand::FilterSharedCommand(string option) {
93         try {
94                 abort = false; calledHelp = false;   
95                 allLines = 1;
96                 
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string,string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter;
108                         
109                         //check to make sure all parameters are valid for command
110                         map<string,string>::iterator it;
111                         for (it = parameters.begin(); it != parameters.end(); it++) { 
112                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["shared"] = tempOutNames;
118                                     
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                 it = parameters.find("shared");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
130                                 }
131             }
132                                         
133                         sharedfile = validParameter.validFile(parameters, "shared", true);
134                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
135                         else if (sharedfile == "not found") { 
136                                 //if there is a current shared file, use it
137                                 sharedfile = m->getSharedFile(); 
138                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
140                         }else { m->setSharedFile(sharedfile); }
141                         
142             //if the user changes the output directory command factory will send this info to us in the output parameter 
143                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);     }
144             
145                         //check for optional parameter and set defaults
146                         // ...at some point should added some additional type checking...
147                         label = validParameter.validFile(parameters, "label", false);                   
148                         if (label == "not found") { label = ""; }
149                         else { 
150                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
151                                 else { allLines = 1;  }
152                         }
153                         
154                         groups = validParameter.validFile(parameters, "groups", false);                 
155                         if (groups == "not found") { groups = ""; pickedGroups = false; }
156                         else { 
157                                 pickedGroups = true;
158                                 m->splitAtDash(groups, Groups);
159                                 m->setGroups(Groups);
160                         }
161                         
162             bool setSomething = false;
163                         string temp = validParameter.validFile(parameters, "minabund", false);          
164             if (temp == "not found"){   temp = "-1";            }
165             else { setSomething = true; }
166                         m->mothurConvert(temp, minAbund);  
167             
168             temp = validParameter.validFile(parameters, "mintotal", false);             
169             if (temp == "not found"){   temp = "-1";            }
170             else { setSomething = true; }
171                         m->mothurConvert(temp, minTotal);
172             
173             temp = validParameter.validFile(parameters, "minnumsamples", false);                
174             if (temp == "not found"){   temp = "-1";            }
175             else { setSomething = true; }
176                         m->mothurConvert(temp, minSamples);
177             
178             temp = validParameter.validFile(parameters, "minpercent", false);           
179             if (temp == "not found"){   temp = "-1";            }
180             else { setSomething = true; }
181             
182                         m->mothurConvert(temp, minPercent);
183             if (minPercent < 1) {} //already in percent form
184             else {  minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to %
185             
186             temp = validParameter.validFile(parameters, "minpercentsamples", false);            
187             if (temp == "not found"){   temp = "-1";            }
188             else { setSomething = true; }
189             
190                         m->mothurConvert(temp, minPercentSamples);
191             if (minPercentSamples < 1) {} //already in percent form
192             else {  minPercentSamples = minPercentSamples / 100.0; } //user gave us a whole number version so convert to %
193                         
194                         temp = validParameter.validFile(parameters, "makerare", false);         if (temp == "not found"){       temp = "T";             }
195                         makeRare = m->isTrue(temp);
196             
197             if (!setSomething) { m->mothurOut("\nYou did not set any parameters. I will filter using minabund=1.\n\n"); minAbund = 1; }
198         }
199         
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "FilterSharedCommand", "FilterSharedCommand");
203                 exit(1);
204         }
205 }
206 //**********************************************************************************************************************
207
208 int FilterSharedCommand::execute(){
209         try {
210         
211                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
212                 
213         InputData input(sharedfile, "sharedfile");
214                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
215                 string lastLabel = lookup[0]->getLabel();
216                 
217                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
218                 set<string> processedLabels;
219                 set<string> userLabels = labels;
220                 
221                 //as long as you are not at the end of the file or done wih the lines you want
222                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
223                         if (m->control_pressed) {   for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  
224                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;  }
225                         
226                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
227                                 
228                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
229                                 
230                                 processShared(lookup);
231                                 
232                                 processedLabels.insert(lookup[0]->getLabel());
233                                 userLabels.erase(lookup[0]->getLabel());
234                         }
235                         
236                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
237                                 string saveLabel = lookup[0]->getLabel();
238                                 
239                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
240                                 
241                                 lookup = input.getSharedRAbundVectors(lastLabel);
242                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
243                                 
244                                 processShared(lookup);
245                                 
246                                 processedLabels.insert(lookup[0]->getLabel());
247                                 userLabels.erase(lookup[0]->getLabel());
248                                 
249                                 //restore real lastlabel to save below
250                                 lookup[0]->setLabel(saveLabel);
251                         }
252                         
253                         lastLabel = lookup[0]->getLabel();
254                         //prevent memory leak
255                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
256                         
257                         //get next line to process
258                         lookup = input.getSharedRAbundVectors();                                
259                 }
260                 
261                 
262                 if (m->control_pressed) {   return 0;  }
263                 
264                 //output error messages about any remaining user labels
265                 set<string>::iterator it;
266                 bool needToRun = false;
267                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
268                         m->mothurOut("Your file does not include the label " + *it); 
269                         if (processedLabels.count(lastLabel) != 1) {
270                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
271                                 needToRun = true;
272                         }else {
273                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
274                         }
275                 }
276                 
277                 //run last label if you need to
278                 if (needToRun == true)  {
279                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
280                         lookup = input.getSharedRAbundVectors(lastLabel);
281                         
282                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
283                         
284                         processShared(lookup);
285                         
286                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
287                 }
288         
289                 //set shared file as new current sharedfile
290                 string current = "";
291         itTypes = outputTypes.find("shared");
292                 if (itTypes != outputTypes.end()) {
293                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
294                 }
295                 
296                 m->mothurOutEndLine();
297                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
298                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
299                 m->mothurOutEndLine();
300                 
301                 return 0;
302         }
303         catch(exception& e) {
304                 m->errorOut(e, "FilterSharedCommand", "execute");
305                 exit(1);
306         }
307 }
308 //**********************************************************************************************************************
309 int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
310         try {
311                 
312                 //save mothurOut's binLabels to restore for next label
313                 vector<string> saveBinLabels = m->currentBinLabels;
314                 
315         map<string, string> variables; 
316         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
317         variables["[extension]"] = m->getExtension(sharedfile);
318         variables["[distance]"] = thislookup[0]->getLabel();
319                 string outputFileName = getOutputFileName("shared", variables);        
320         
321         if (m->control_pressed) {  return 0; }
322         
323         vector<string> filteredLabels;
324         vector<int> rareCounts; rareCounts.resize(m->getGroups().size(), 0);
325         
326         //create new "filtered" lookup
327         vector<SharedRAbundVector*> filteredLookup;
328         for (int i = 0; i < thislookup.size(); i++) {
329             SharedRAbundVector* temp = new SharedRAbundVector();
330                         temp->setLabel(thislookup[i]->getLabel());
331                         temp->setGroup(thislookup[i]->getGroup());
332                         filteredLookup.push_back(temp);
333         }
334         
335         bool filteredSomething = false;
336         int numRemoved = 0;
337         for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
338             
339             if (m->control_pressed) { for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; } return 0; }
340             
341             bool okay = true; //innocent until proven guilty
342             if (minAbund != -1) {
343                 for (int j = 0; j < thislookup.size(); j++) { 
344                     if (thislookup[j]->getAbundance(i) < minAbund) { okay = false; break; }
345                 }
346             }
347             
348             if (okay && (minTotal != -1)) {
349                 int otuTotal = 0;
350                 for (int j = 0; j < thislookup.size(); j++) { 
351                     otuTotal += thislookup[j]->getAbundance(i);
352                 }
353                 if (otuTotal < minTotal) { okay = false; }
354             }
355             
356             if (okay && (minPercent != -0.01)) {
357                 double otuTotal = 0;
358                 double total = 0;
359                 for (int j = 0; j < thislookup.size(); j++) { 
360                     otuTotal += thislookup[j]->getAbundance(i);
361                     total += thislookup[j]->getNumSeqs();
362                 }
363                 double percent = otuTotal / total; 
364                 if (percent < minPercent) { okay = false; }
365             }
366             
367             
368             if (okay && (minSamples != -1)) {
369                 int samples = 0;
370                 for (int j = 0; j < thislookup.size(); j++) { 
371                     if (thislookup[j]->getAbundance(i) != 0) { samples++; }
372                 }
373                 if (samples < minSamples) { okay = false; }
374             }
375             
376             if (okay && (minPercentSamples != -0.01)) {
377                 double samples = 0;
378                 double total = thislookup.size();
379                 for (int j = 0; j < thislookup.size(); j++) { 
380                     if (thislookup[j]->getAbundance(i) != 0) { samples++; }
381                 }
382                 double percent = samples / total; 
383                 if (percent < minPercentSamples) { okay = false; }
384             }
385             
386             //did this OTU pass the filter criteria
387             if (okay) {
388                 filteredLabels.push_back(saveBinLabels[i]);
389                 for (int j = 0; j < filteredLookup.size(); j++) { //add this OTU to the filtered lookup
390                     filteredLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
391                 }
392             }else { //if not, do we want to save the counts
393                 filteredSomething = true;
394                 if (makeRare) {
395                     for (int j = 0; j < rareCounts.size(); j++) {  rareCounts[j] += thislookup[j]->getAbundance(i); }
396                 }
397                 numRemoved++;
398             }
399             
400         }
401         
402         //if we are saving the counts add a "rare" OTU if anything was filtered
403         if (makeRare) {
404             if (filteredSomething) {
405                 for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
406                     filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
407                 }
408                 //create label for rare OTUs
409                 filteredLabels.push_back("rareOTUs");
410             }
411         }
412         
413         ofstream out;
414                 m->openOutputFile(outputFileName, out);
415                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
416                 
417         m->currentBinLabels = filteredLabels;
418         
419                 filteredLookup[0]->printHeaders(out);
420                 
421                 for (int i = 0; i < filteredLookup.size(); i++) {
422                         out << filteredLookup[i]->getLabel() << '\t' << filteredLookup[i]->getGroup() << '\t';
423                         filteredLookup[i]->print(out);
424                 }
425                 out.close();
426         
427         
428         //save mothurOut's binLabels to restore for next label
429                 m->currentBinLabels = saveBinLabels;
430         
431         for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
432                 
433         m->mothurOut("\nRemoved " + toString(numRemoved) + " OTUs.\n");
434         
435                 return 0;
436                 
437         }
438         catch(exception& e) {
439                 m->errorOut(e, "FilterSharedCommand", "processShared");
440                 exit(1);
441         }
442 }                       
443 //**********************************************************************************************************************
444
445