2 // filtersharedcommand.cpp
5 // Created by Sarah Westcott on 1/4/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "filtersharedcommand.h"
11 //**********************************************************************************************************************
12 vector<string> FilterSharedCommand::setParameters(){
14 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared);
15 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
16 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17 CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent);
18 CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund);
19 CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal);
20 CommandParameter pminnumsamples("minnumsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminnumsamples);
21 CommandParameter pminpercentsamples("minpercentsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercentsamples);
22 CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "FilterSharedCommand", "setParameters");
35 //**********************************************************************************************************************
36 string FilterSharedCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n";
40 helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, minnumsamples, minpercentsamples, makerare, groups and label. You must provide a shared file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
42 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
44 helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU. If any samples abundance falls below the minimum, the OTU is removed. Default=0\n";
45 helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n";
46 helpString += "The minnumsamples parameter allows you indicate the minimum number of samples present in an OTU. If the number of samples present falls below the minimum, the OTU is removed. Default=0.\n";
47 helpString += "The minpercentsamples parameter allows you indicate the minimum percent of sample present in an OTU. For example, if the total number of samples is 10, the number present is 3, and the minpercentsamples=50. The OTU's precent of samples is 0.333, the minimum is 0.50, so the OTU will be removed. Default=0.\n";
48 helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU. If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n";
50 helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed. This will preserve the number of reads in your dataset. Default=T\n";
51 helpString += "The filter.shared command should be in the following format: filter.shared(shared=yourSharedFile, minabund=yourMinAbund, groups=yourGroups, label=yourLabels).\n";
52 helpString += "Example filter.shared(shared=final.an.shared, minabund=10).\n";
53 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
54 helpString += "The filter.shared command outputs a .filter.shared file.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "FilterSharedCommand", "getHelpString");
63 //**********************************************************************************************************************
64 string FilterSharedCommand::getOutputPattern(string type) {
68 if (type == "shared") { pattern = "[filename],[distance],filter,[extension]"; }
69 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
74 m->errorOut(e, "FilterSharedCommand", "getOutputPattern");
78 //**********************************************************************************************************************
79 FilterSharedCommand::FilterSharedCommand(){
81 abort = true; calledHelp = true;
83 vector<string> tempOutNames;
84 outputTypes["shared"] = tempOutNames;
87 m->errorOut(e, "FilterSharedCommand", "GetRelAbundCommand");
91 //**********************************************************************************************************************
92 FilterSharedCommand::FilterSharedCommand(string option) {
94 abort = false; calledHelp = false;
97 //allow user to run help
98 if(option == "help") { help(); abort = true; calledHelp = true; }
99 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
102 vector<string> myArray = setParameters();
104 OptionParser parser(option);
105 map<string,string> parameters = parser.getParameters();
107 ValidParameters validParameter;
109 //check to make sure all parameters are valid for command
110 map<string,string>::iterator it;
111 for (it = parameters.begin(); it != parameters.end(); it++) {
112 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
115 //initialize outputTypes
116 vector<string> tempOutNames;
117 outputTypes["shared"] = tempOutNames;
119 //if the user changes the input directory command factory will send this info to us in the output parameter
120 string inputDir = validParameter.validFile(parameters, "inputdir", false);
121 if (inputDir == "not found"){ inputDir = ""; }
124 it = parameters.find("shared");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["shared"] = inputDir + it->second; }
133 sharedfile = validParameter.validFile(parameters, "shared", true);
134 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
135 else if (sharedfile == "not found") {
136 //if there is a current shared file, use it
137 sharedfile = m->getSharedFile();
138 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
140 }else { m->setSharedFile(sharedfile); }
142 //if the user changes the output directory command factory will send this info to us in the output parameter
143 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
145 //check for optional parameter and set defaults
146 // ...at some point should added some additional type checking...
147 label = validParameter.validFile(parameters, "label", false);
148 if (label == "not found") { label = ""; }
150 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
151 else { allLines = 1; }
154 groups = validParameter.validFile(parameters, "groups", false);
155 if (groups == "not found") { groups = ""; pickedGroups = false; }
158 m->splitAtDash(groups, Groups);
159 m->setGroups(Groups);
162 bool setSomething = false;
163 string temp = validParameter.validFile(parameters, "minabund", false);
164 if (temp == "not found"){ temp = "-1"; }
165 else { setSomething = true; }
166 m->mothurConvert(temp, minAbund);
168 temp = validParameter.validFile(parameters, "mintotal", false);
169 if (temp == "not found"){ temp = "-1"; }
170 else { setSomething = true; }
171 m->mothurConvert(temp, minTotal);
173 temp = validParameter.validFile(parameters, "minnumsamples", false);
174 if (temp == "not found"){ temp = "-1"; }
175 else { setSomething = true; }
176 m->mothurConvert(temp, minSamples);
178 temp = validParameter.validFile(parameters, "minpercent", false);
179 if (temp == "not found"){ temp = "-1"; }
180 else { setSomething = true; }
182 m->mothurConvert(temp, minPercent);
183 if (minPercent < 1) {} //already in percent form
184 else { minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to %
186 temp = validParameter.validFile(parameters, "minpercentsamples", false);
187 if (temp == "not found"){ temp = "-1"; }
188 else { setSomething = true; }
190 m->mothurConvert(temp, minPercentSamples);
191 if (minPercentSamples < 1) {} //already in percent form
192 else { minPercentSamples = minPercentSamples / 100.0; } //user gave us a whole number version so convert to %
194 temp = validParameter.validFile(parameters, "makerare", false); if (temp == "not found"){ temp = "T"; }
195 makeRare = m->isTrue(temp);
197 if (!setSomething) { m->mothurOut("\nYou did not set any parameters. I will filter using minabund=1.\n\n"); minAbund = 1; }
201 catch(exception& e) {
202 m->errorOut(e, "FilterSharedCommand", "FilterSharedCommand");
206 //**********************************************************************************************************************
208 int FilterSharedCommand::execute(){
211 if (abort == true) { if (calledHelp) { return 0; } return 2; }
213 InputData input(sharedfile, "sharedfile");
214 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
215 string lastLabel = lookup[0]->getLabel();
217 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
218 set<string> processedLabels;
219 set<string> userLabels = labels;
221 //as long as you are not at the end of the file or done wih the lines you want
222 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
223 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
224 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
226 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
228 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
230 processShared(lookup);
232 processedLabels.insert(lookup[0]->getLabel());
233 userLabels.erase(lookup[0]->getLabel());
236 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
237 string saveLabel = lookup[0]->getLabel();
239 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
241 lookup = input.getSharedRAbundVectors(lastLabel);
242 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
244 processShared(lookup);
246 processedLabels.insert(lookup[0]->getLabel());
247 userLabels.erase(lookup[0]->getLabel());
249 //restore real lastlabel to save below
250 lookup[0]->setLabel(saveLabel);
253 lastLabel = lookup[0]->getLabel();
254 //prevent memory leak
255 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
257 //get next line to process
258 lookup = input.getSharedRAbundVectors();
262 if (m->control_pressed) { return 0; }
264 //output error messages about any remaining user labels
265 set<string>::iterator it;
266 bool needToRun = false;
267 for (it = userLabels.begin(); it != userLabels.end(); it++) {
268 m->mothurOut("Your file does not include the label " + *it);
269 if (processedLabels.count(lastLabel) != 1) {
270 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
273 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
277 //run last label if you need to
278 if (needToRun == true) {
279 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
280 lookup = input.getSharedRAbundVectors(lastLabel);
282 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
284 processShared(lookup);
286 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
289 //set shared file as new current sharedfile
291 itTypes = outputTypes.find("shared");
292 if (itTypes != outputTypes.end()) {
293 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
296 m->mothurOutEndLine();
297 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
298 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
299 m->mothurOutEndLine();
303 catch(exception& e) {
304 m->errorOut(e, "FilterSharedCommand", "execute");
308 //**********************************************************************************************************************
309 int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
312 //save mothurOut's binLabels to restore for next label
313 vector<string> saveBinLabels = m->currentBinLabels;
315 map<string, string> variables;
316 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
317 variables["[extension]"] = m->getExtension(sharedfile);
318 variables["[distance]"] = thislookup[0]->getLabel();
319 string outputFileName = getOutputFileName("shared", variables);
321 if (m->control_pressed) { return 0; }
323 vector<string> filteredLabels;
324 vector<int> rareCounts; rareCounts.resize(m->getGroups().size(), 0);
326 //create new "filtered" lookup
327 vector<SharedRAbundVector*> filteredLookup;
328 for (int i = 0; i < thislookup.size(); i++) {
329 SharedRAbundVector* temp = new SharedRAbundVector();
330 temp->setLabel(thislookup[i]->getLabel());
331 temp->setGroup(thislookup[i]->getGroup());
332 filteredLookup.push_back(temp);
335 bool filteredSomething = false;
337 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
339 if (m->control_pressed) { for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; } return 0; }
341 bool okay = true; //innocent until proven guilty
342 if (minAbund != -1) {
343 for (int j = 0; j < thislookup.size(); j++) {
344 if (thislookup[j]->getAbundance(i) < minAbund) { okay = false; break; }
348 if (okay && (minTotal != -1)) {
350 for (int j = 0; j < thislookup.size(); j++) {
351 otuTotal += thislookup[j]->getAbundance(i);
353 if (otuTotal < minTotal) { okay = false; }
356 if (okay && (minPercent != -0.01)) {
359 for (int j = 0; j < thislookup.size(); j++) {
360 otuTotal += thislookup[j]->getAbundance(i);
361 total += thislookup[j]->getNumSeqs();
363 double percent = otuTotal / total;
364 if (percent < minPercent) { okay = false; }
368 if (okay && (minSamples != -1)) {
370 for (int j = 0; j < thislookup.size(); j++) {
371 if (thislookup[j]->getAbundance(i) != 0) { samples++; }
373 if (samples < minSamples) { okay = false; }
376 if (okay && (minPercentSamples != -0.01)) {
378 double total = thislookup.size();
379 for (int j = 0; j < thislookup.size(); j++) {
380 if (thislookup[j]->getAbundance(i) != 0) { samples++; }
382 double percent = samples / total;
383 if (percent < minPercentSamples) { okay = false; }
386 //did this OTU pass the filter criteria
388 filteredLabels.push_back(saveBinLabels[i]);
389 for (int j = 0; j < filteredLookup.size(); j++) { //add this OTU to the filtered lookup
390 filteredLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
392 }else { //if not, do we want to save the counts
393 filteredSomething = true;
395 for (int j = 0; j < rareCounts.size(); j++) { rareCounts[j] += thislookup[j]->getAbundance(i); }
402 //if we are saving the counts add a "rare" OTU if anything was filtered
404 if (filteredSomething) {
405 for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
406 filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
408 //create label for rare OTUs
409 filteredLabels.push_back("rareOTUs");
414 m->openOutputFile(outputFileName, out);
415 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
417 m->currentBinLabels = filteredLabels;
419 filteredLookup[0]->printHeaders(out);
421 for (int i = 0; i < filteredLookup.size(); i++) {
422 out << filteredLookup[i]->getLabel() << '\t' << filteredLookup[i]->getGroup() << '\t';
423 filteredLookup[i]->print(out);
428 //save mothurOut's binLabels to restore for next label
429 m->currentBinLabels = saveBinLabels;
431 for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
433 m->mothurOut("\nRemoved " + toString(numRemoved) + " OTUs.\n");
438 catch(exception& e) {
439 m->errorOut(e, "FilterSharedCommand", "processShared");
443 //**********************************************************************************************************************