2 // filtersharedcommand.cpp
5 // Created by Sarah Westcott on 1/4/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "filtersharedcommand.h"
11 //**********************************************************************************************************************
12 vector<string> FilterSharedCommand::setParameters(){
14 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared);
15 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
16 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17 CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent);
18 CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund);
19 CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal);
20 CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "FilterSharedCommand", "setParameters");
33 //**********************************************************************************************************************
34 string FilterSharedCommand::getHelpString(){
36 string helpString = "";
37 helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n";
38 helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, makerare, groups and label. You must provide a shared file.\n";
39 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
40 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
42 helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU. If any samples abundance falls below the minimum, the OTU is removed. Default=0\n";
43 helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n";
44 helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU. If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n";
46 helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed. This will preserve the number of reads in your dataset. Default=T\n";
47 helpString += "The filter.shared command should be in the following format: filter.shared(shared=yourSharedFile, minabund=yourMinAbund, groups=yourGroups, label=yourLabels).\n";
48 helpString += "Example filter.shared(shared=final.an.shared, minabund=10).\n";
49 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
50 helpString += "The filter.shared command outputs a .filter.shared file.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "FilterSharedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 string FilterSharedCommand::getOutputPattern(string type) {
64 if (type == "shared") { pattern = "[filename],[distance],filter,[extension]"; }
65 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
70 m->errorOut(e, "FilterSharedCommand", "getOutputPattern");
74 //**********************************************************************************************************************
75 FilterSharedCommand::FilterSharedCommand(){
77 abort = true; calledHelp = true;
79 vector<string> tempOutNames;
80 outputTypes["shared"] = tempOutNames;
83 m->errorOut(e, "FilterSharedCommand", "GetRelAbundCommand");
87 //**********************************************************************************************************************
88 FilterSharedCommand::FilterSharedCommand(string option) {
90 abort = false; calledHelp = false;
93 //allow user to run help
94 if(option == "help") { help(); abort = true; calledHelp = true; }
95 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
98 vector<string> myArray = setParameters();
100 OptionParser parser(option);
101 map<string,string> parameters = parser.getParameters();
103 ValidParameters validParameter;
105 //check to make sure all parameters are valid for command
106 map<string,string>::iterator it;
107 for (it = parameters.begin(); it != parameters.end(); it++) {
108 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
111 //initialize outputTypes
112 vector<string> tempOutNames;
113 outputTypes["shared"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("shared");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["shared"] = inputDir + it->second; }
129 sharedfile = validParameter.validFile(parameters, "shared", true);
130 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
131 else if (sharedfile == "not found") {
132 //if there is a current shared file, use it
133 sharedfile = m->getSharedFile();
134 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
135 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
136 }else { m->setSharedFile(sharedfile); }
138 //if the user changes the output directory command factory will send this info to us in the output parameter
139 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
141 //check for optional parameter and set defaults
142 // ...at some point should added some additional type checking...
143 label = validParameter.validFile(parameters, "label", false);
144 if (label == "not found") { label = ""; }
146 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
147 else { allLines = 1; }
150 groups = validParameter.validFile(parameters, "groups", false);
151 if (groups == "not found") { groups = ""; pickedGroups = false; }
154 m->splitAtDash(groups, Groups);
155 m->setGroups(Groups);
158 bool setSomething = false;
159 string temp = validParameter.validFile(parameters, "minabund", false);
160 if (temp == "not found"){ temp = "-1"; }
161 else { setSomething = true; }
162 m->mothurConvert(temp, minAbund);
164 temp = validParameter.validFile(parameters, "mintotal", false);
165 if (temp == "not found"){ temp = "-1"; }
166 else { setSomething = true; }
167 m->mothurConvert(temp, minTotal);
169 temp = validParameter.validFile(parameters, "minpercent", false);
170 if (temp == "not found"){ temp = "-1"; }
171 else { setSomething = true; }
173 m->mothurConvert(temp, minPercent);
174 if (minPercent < 1) {} //already in percent form
175 else { minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to %
177 temp = validParameter.validFile(parameters, "makerare", false); if (temp == "not found"){ temp = "T"; }
178 makeRare = m->isTrue(temp);
180 if (!setSomething) { m->mothurOut("\nYou did not set any parameters. I will filter using minabund=1.\n\n"); minAbund = 1; }
184 catch(exception& e) {
185 m->errorOut(e, "FilterSharedCommand", "FilterSharedCommand");
189 //**********************************************************************************************************************
191 int FilterSharedCommand::execute(){
194 if (abort == true) { if (calledHelp) { return 0; } return 2; }
196 InputData input(sharedfile, "sharedfile");
197 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
198 string lastLabel = lookup[0]->getLabel();
200 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
201 set<string> processedLabels;
202 set<string> userLabels = labels;
204 //as long as you are not at the end of the file or done wih the lines you want
205 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
206 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
207 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
209 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
211 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
213 processShared(lookup);
215 processedLabels.insert(lookup[0]->getLabel());
216 userLabels.erase(lookup[0]->getLabel());
219 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
220 string saveLabel = lookup[0]->getLabel();
222 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
224 lookup = input.getSharedRAbundVectors(lastLabel);
225 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
227 processShared(lookup);
229 processedLabels.insert(lookup[0]->getLabel());
230 userLabels.erase(lookup[0]->getLabel());
232 //restore real lastlabel to save below
233 lookup[0]->setLabel(saveLabel);
236 lastLabel = lookup[0]->getLabel();
237 //prevent memory leak
238 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
240 //get next line to process
241 lookup = input.getSharedRAbundVectors();
245 if (m->control_pressed) { return 0; }
247 //output error messages about any remaining user labels
248 set<string>::iterator it;
249 bool needToRun = false;
250 for (it = userLabels.begin(); it != userLabels.end(); it++) {
251 m->mothurOut("Your file does not include the label " + *it);
252 if (processedLabels.count(lastLabel) != 1) {
253 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
256 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
260 //run last label if you need to
261 if (needToRun == true) {
262 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
263 lookup = input.getSharedRAbundVectors(lastLabel);
265 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
267 processShared(lookup);
269 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
272 //set shared file as new current sharedfile
274 itTypes = outputTypes.find("shared");
275 if (itTypes != outputTypes.end()) {
276 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
279 m->mothurOutEndLine();
280 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
281 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
282 m->mothurOutEndLine();
286 catch(exception& e) {
287 m->errorOut(e, "FilterSharedCommand", "execute");
291 //**********************************************************************************************************************
292 int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
295 //save mothurOut's binLabels to restore for next label
296 vector<string> saveBinLabels = m->currentBinLabels;
298 map<string, string> variables;
299 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
300 variables["[extension]"] = m->getExtension(sharedfile);
301 variables["[distance]"] = thislookup[0]->getLabel();
302 string outputFileName = getOutputFileName("shared", variables);
304 if (m->control_pressed) { return 0; }
306 vector<string> filteredLabels;
307 vector<int> rareCounts; rareCounts.resize(m->getGroups().size(), 0);
309 //create new "filtered" lookup
310 vector<SharedRAbundVector*> filteredLookup;
311 for (int i = 0; i < thislookup.size(); i++) {
312 SharedRAbundVector* temp = new SharedRAbundVector();
313 temp->setLabel(thislookup[i]->getLabel());
314 temp->setGroup(thislookup[i]->getGroup());
315 filteredLookup.push_back(temp);
318 bool filteredSomething = false;
320 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
322 if (m->control_pressed) { for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; } return 0; }
324 bool okay = true; //innocent until proven guilty
325 if (minAbund != -1) {
326 for (int j = 0; j < thislookup.size(); j++) {
327 if (thislookup[j]->getAbundance(i) < minAbund) { okay = false; break; }
331 if (okay && (minTotal != -1)) {
333 for (int j = 0; j < thislookup.size(); j++) {
334 otuTotal += thislookup[j]->getAbundance(i);
336 if (otuTotal < minTotal) { okay = false; }
339 if (okay && (minPercent != -0.01)) {
342 for (int j = 0; j < thislookup.size(); j++) {
343 otuTotal += thislookup[j]->getAbundance(i);
344 total += thislookup[j]->getNumSeqs();
346 double percent = otuTotal / total;
347 if (percent < minPercent) { okay = false; }
350 //did this OTU pass the filter criteria
352 filteredLabels.push_back(saveBinLabels[i]);
353 for (int j = 0; j < filteredLookup.size(); j++) { //add this OTU to the filtered lookup
354 filteredLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
356 }else { //if not, do we want to save the counts
357 filteredSomething = true;
359 for (int j = 0; j < rareCounts.size(); j++) { rareCounts[j] += thislookup[j]->getAbundance(i); }
366 //if we are saving the counts add a "rare" OTU if anything was filtered
368 if (filteredSomething) {
369 for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
370 filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
374 string oldLastLabel = saveBinLabels[saveBinLabels.size()-1];
376 string otuNumber = "";
377 for (int i = 0;i < oldLastLabel.length(); i++){
379 if( oldLastLabel[i]>47 && oldLastLabel[i]<58){ otuNumber += oldLastLabel[i]; }
380 else { tag += oldLastLabel[i]; }
384 m->mothurConvert(otuNumber, oldLastBin);
386 string newLabel = tag + toString(oldLastBin);
387 filteredLabels.push_back(newLabel);
392 m->openOutputFile(outputFileName, out);
393 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
395 m->currentBinLabels = filteredLabels;
397 filteredLookup[0]->printHeaders(out);
399 for (int i = 0; i < filteredLookup.size(); i++) {
400 out << filteredLookup[i]->getLabel() << '\t' << filteredLookup[i]->getGroup() << '\t';
401 filteredLookup[i]->print(out);
406 //save mothurOut's binLabels to restore for next label
407 m->currentBinLabels = saveBinLabels;
409 for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
411 m->mothurOut("\nRemoved " + toString(numRemoved) + " OTUs.\n");
416 catch(exception& e) {
417 m->errorOut(e, "FilterSharedCommand", "processShared");
421 //**********************************************************************************************************************