2 // filtersharedcommand.cpp
5 // Created by Sarah Westcott on 1/4/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "filtersharedcommand.h"
11 //**********************************************************************************************************************
12 vector<string> FilterSharedCommand::setParameters(){
14 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared);
15 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
16 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17 CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent);
18 CommandParameter prarepercent("rarepercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(prarepercent);
19 CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund);
20 CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal);
21 CommandParameter pminnumsamples("minnumsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminnumsamples);
22 CommandParameter pminpercentsamples("minpercentsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercentsamples);
23 CommandParameter pkeepties("keepties", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pkeepties);
24 CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "FilterSharedCommand", "setParameters");
37 //**********************************************************************************************************************
38 string FilterSharedCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n";
42 helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, minnumsamples, minpercentsamples, rarepercent, makerare, keepties, groups and label. You must provide a shared file.\n";
43 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
44 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
46 helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU. If any samples abundance falls below the minimum, the OTU is removed. Default=0\n";
47 helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n";
48 helpString += "The rarepercent parameter allows you indicate the percentage of otus to remove. The OTUs chosen to be removed are the rarest. For example if you have 1000 OTUs, rarepercent=20 would remove the 200 OTUs with the lowest abundance. Default=0.\n";
49 helpString += "The keepties parameter is used with the rarepercent parameter. It allows you indicate you want to keep the OTUs with the same abundance as the first 'not rare' OTU. For example if you have 10 OTUs, rarepercent=20 abundances of 20, 18, 15, 15, 10, 5, 3, 3, 3, 1. keepties=t, would remove the 10th OTU, but keep the 9th because its abundance ties the 8th OTU. keepties=f would remove OTUs 9 and 10. Default=T\n";
50 helpString += "The minnumsamples parameter allows you indicate the minimum number of samples present in an OTU. If the number of samples present falls below the minimum, the OTU is removed. Default=0.\n";
51 helpString += "The minpercentsamples parameter allows you indicate the minimum percent of sample present in an OTU. For example, if the total number of samples is 10, the number present is 3, and the minpercentsamples=50. The OTU's precent of samples is 0.333, the minimum is 0.50, so the OTU will be removed. Default=0.\n";
52 helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU. If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n";
54 helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed. This will preserve the number of reads in your dataset. Default=T\n";
55 helpString += "The filter.shared command should be in the following format: filter.shared(shared=yourSharedFile, minabund=yourMinAbund, groups=yourGroups, label=yourLabels).\n";
56 helpString += "Example filter.shared(shared=final.an.shared, minabund=10).\n";
57 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
58 helpString += "The filter.shared command outputs a .filter.shared file.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "FilterSharedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string FilterSharedCommand::getOutputPattern(string type) {
72 if (type == "shared") { pattern = "[filename],[distance],filter,[extension]"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "FilterSharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 FilterSharedCommand::FilterSharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["shared"] = tempOutNames;
91 m->errorOut(e, "FilterSharedCommand", "GetRelAbundCommand");
95 //**********************************************************************************************************************
96 FilterSharedCommand::FilterSharedCommand(string option) {
98 abort = false; calledHelp = false;
101 //allow user to run help
102 if(option == "help") { help(); abort = true; calledHelp = true; }
103 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106 vector<string> myArray = setParameters();
108 OptionParser parser(option);
109 map<string,string> parameters = parser.getParameters();
111 ValidParameters validParameter;
113 //check to make sure all parameters are valid for command
114 map<string,string>::iterator it;
115 for (it = parameters.begin(); it != parameters.end(); it++) {
116 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
119 //initialize outputTypes
120 vector<string> tempOutNames;
121 outputTypes["shared"] = tempOutNames;
123 //if the user changes the input directory command factory will send this info to us in the output parameter
124 string inputDir = validParameter.validFile(parameters, "inputdir", false);
125 if (inputDir == "not found"){ inputDir = ""; }
128 it = parameters.find("shared");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["shared"] = inputDir + it->second; }
137 sharedfile = validParameter.validFile(parameters, "shared", true);
138 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
139 else if (sharedfile == "not found") {
140 //if there is a current shared file, use it
141 sharedfile = m->getSharedFile();
142 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
143 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
144 }else { m->setSharedFile(sharedfile); }
146 //if the user changes the output directory command factory will send this info to us in the output parameter
147 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
149 //check for optional parameter and set defaults
150 // ...at some point should added some additional type checking...
151 label = validParameter.validFile(parameters, "label", false);
152 if (label == "not found") { label = ""; }
154 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
155 else { allLines = 1; }
158 groups = validParameter.validFile(parameters, "groups", false);
159 if (groups == "not found") { groups = ""; pickedGroups = false; }
162 m->splitAtDash(groups, Groups);
163 m->setGroups(Groups);
166 bool setSomething = false;
167 string temp = validParameter.validFile(parameters, "minabund", false);
168 if (temp == "not found"){ temp = "-1"; }
169 else { setSomething = true; }
170 m->mothurConvert(temp, minAbund);
172 temp = validParameter.validFile(parameters, "mintotal", false);
173 if (temp == "not found"){ temp = "-1"; }
174 else { setSomething = true; }
175 m->mothurConvert(temp, minTotal);
177 temp = validParameter.validFile(parameters, "minnumsamples", false);
178 if (temp == "not found"){ temp = "-1"; }
179 else { setSomething = true; }
180 m->mothurConvert(temp, minSamples);
182 temp = validParameter.validFile(parameters, "minpercent", false);
183 if (temp == "not found"){ temp = "-1"; }
184 else { setSomething = true; }
186 m->mothurConvert(temp, minPercent);
187 if (minPercent < 1) {} //already in percent form
188 else { minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to %
190 temp = validParameter.validFile(parameters, "rarepercent", false);
191 if (temp == "not found"){ temp = "-1"; }
192 else { setSomething = true; }
194 m->mothurConvert(temp, rarePercent);
195 if (rarePercent < 1) {} //already in percent form
196 else { rarePercent = rarePercent / 100.0; } //user gave us a whole number version so convert to %
198 temp = validParameter.validFile(parameters, "minpercentsamples", false);
199 if (temp == "not found"){ temp = "-1"; }
200 else { setSomething = true; }
202 m->mothurConvert(temp, minPercentSamples);
203 if (minPercentSamples < 1) {} //already in percent form
204 else { minPercentSamples = minPercentSamples / 100.0; } //user gave us a whole number version so convert to %
206 temp = validParameter.validFile(parameters, "makerare", false); if (temp == "not found"){ temp = "T"; }
207 makeRare = m->isTrue(temp);
209 temp = validParameter.validFile(parameters, "keepties", false); if (temp == "not found"){ temp = "T"; }
210 keepties = m->isTrue(temp);
212 if (!setSomething) { m->mothurOut("\nYou did not set any parameters. I will filter using minabund=1.\n\n"); minAbund = 1; }
216 catch(exception& e) {
217 m->errorOut(e, "FilterSharedCommand", "FilterSharedCommand");
221 //**********************************************************************************************************************
223 int FilterSharedCommand::execute(){
226 if (abort == true) { if (calledHelp) { return 0; } return 2; }
228 InputData input(sharedfile, "sharedfile");
229 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
230 string lastLabel = lookup[0]->getLabel();
232 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
233 set<string> processedLabels;
234 set<string> userLabels = labels;
236 //as long as you are not at the end of the file or done wih the lines you want
237 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
238 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
239 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
241 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
243 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
245 processShared(lookup);
247 processedLabels.insert(lookup[0]->getLabel());
248 userLabels.erase(lookup[0]->getLabel());
251 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
252 string saveLabel = lookup[0]->getLabel();
254 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
256 lookup = input.getSharedRAbundVectors(lastLabel);
257 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
259 processShared(lookup);
261 processedLabels.insert(lookup[0]->getLabel());
262 userLabels.erase(lookup[0]->getLabel());
264 //restore real lastlabel to save below
265 lookup[0]->setLabel(saveLabel);
268 lastLabel = lookup[0]->getLabel();
269 //prevent memory leak
270 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
272 //get next line to process
273 lookup = input.getSharedRAbundVectors();
277 if (m->control_pressed) { return 0; }
279 //output error messages about any remaining user labels
280 set<string>::iterator it;
281 bool needToRun = false;
282 for (it = userLabels.begin(); it != userLabels.end(); it++) {
283 m->mothurOut("Your file does not include the label " + *it);
284 if (processedLabels.count(lastLabel) != 1) {
285 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
288 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
292 //run last label if you need to
293 if (needToRun == true) {
294 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
295 lookup = input.getSharedRAbundVectors(lastLabel);
297 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
299 processShared(lookup);
301 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
304 //set shared file as new current sharedfile
306 itTypes = outputTypes.find("shared");
307 if (itTypes != outputTypes.end()) {
308 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
311 m->mothurOutEndLine();
312 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
313 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
314 m->mothurOutEndLine();
318 catch(exception& e) {
319 m->errorOut(e, "FilterSharedCommand", "execute");
323 //**********************************************************************************************************************
324 int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
327 //save mothurOut's binLabels to restore for next label
328 vector<string> saveBinLabels = m->currentBinLabels;
330 map<string, string> variables;
331 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
332 variables["[extension]"] = m->getExtension(sharedfile);
333 variables["[distance]"] = thislookup[0]->getLabel();
334 string outputFileName = getOutputFileName("shared", variables);
336 if (m->control_pressed) { return 0; }
338 vector<string> filteredLabels;
339 vector<int> rareCounts; rareCounts.resize(m->getGroups().size(), 0);
341 //create new "filtered" lookup
342 vector<SharedRAbundVector*> filteredLookup;
343 for (int i = 0; i < thislookup.size(); i++) {
344 SharedRAbundVector* temp = new SharedRAbundVector();
345 temp->setLabel(thislookup[i]->getLabel());
346 temp->setGroup(thislookup[i]->getGroup());
347 filteredLookup.push_back(temp);
350 //you want to remove a percentage of OTUs
351 set<string> removeLabels;
352 if (rarePercent != -0.01) {
353 vector<spearmanRank> otus;
354 //rank otus by abundance
355 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
356 float otuTotal = 0.0;
357 for (int j = 0; j < thislookup.size(); j++) {
358 otuTotal += thislookup[j]->getAbundance(i);
360 spearmanRank temp(saveBinLabels[i], otuTotal);
361 otus.push_back(temp);
365 sort(otus.begin(), otus.end(), compareSpearman);
367 //find index of cutoff
368 int indexFirstNotRare = ceil(rarePercent * (float)thislookup[0]->getNumBins());
371 if (keepties) { //adjust indexFirstNotRare if needed
372 if (indexFirstNotRare != 0) { //not out of bounds
373 if (otus[indexFirstNotRare].score == otus[indexFirstNotRare-1].score) { //you have a tie
375 for (int i = indexFirstNotRare-1; i >=0; i--) {
376 if (otus[indexFirstNotRare].score != otus[i].score) { //found value below tie
377 indexFirstNotRare = i+1; tie = false; break;
380 if (tie) { if (m->debug) { m->mothurOut("For distance " + thislookup[0]->getLabel() + " all rare OTUs abundance tie with first 'non rare' OTU, not removing any for rarepercent parameter.\n"); }indexFirstNotRare = 0; }
385 //saved labels for OTUs above rarepercent
386 for (int i = 0; i < indexFirstNotRare; i++) { removeLabels.insert(otus[i].name); }
389 bool filteredSomething = false;
391 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
393 if (m->control_pressed) { for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; } return 0; }
395 bool okay = true; //innocent until proven guilty
396 if (minAbund != -1) {
397 for (int j = 0; j < thislookup.size(); j++) {
398 if (thislookup[j]->getAbundance(i) < minAbund) { okay = false; break; }
402 if (okay && (minTotal != -1)) {
404 for (int j = 0; j < thislookup.size(); j++) {
405 otuTotal += thislookup[j]->getAbundance(i);
407 if (otuTotal < minTotal) { okay = false; }
410 if (okay && (minPercent != -0.01)) {
413 for (int j = 0; j < thislookup.size(); j++) {
414 otuTotal += thislookup[j]->getAbundance(i);
415 total += thislookup[j]->getNumSeqs();
417 double percent = otuTotal / total;
418 if (percent < minPercent) { okay = false; }
422 if (okay && (minSamples != -1)) {
424 for (int j = 0; j < thislookup.size(); j++) {
425 if (thislookup[j]->getAbundance(i) != 0) { samples++; }
427 if (samples < minSamples) { okay = false; }
430 if (okay && (minPercentSamples != -0.01)) {
432 double total = thislookup.size();
433 for (int j = 0; j < thislookup.size(); j++) {
434 if (thislookup[j]->getAbundance(i) != 0) { samples++; }
436 double percent = samples / total;
437 if (percent < minPercentSamples) { okay = false; }
440 if (okay && (rarePercent != -0.01)) {
441 if (removeLabels.count(saveBinLabels[i]) != 0) { //are we on the 'bad' list
446 //did this OTU pass the filter criteria
448 filteredLabels.push_back(saveBinLabels[i]);
449 for (int j = 0; j < filteredLookup.size(); j++) { //add this OTU to the filtered lookup
450 filteredLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
452 }else { //if not, do we want to save the counts
453 filteredSomething = true;
455 for (int j = 0; j < rareCounts.size(); j++) { rareCounts[j] += thislookup[j]->getAbundance(i); }
462 //if we are saving the counts add a "rare" OTU if anything was filtered
464 if (filteredSomething) {
465 for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
466 filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
468 //create label for rare OTUs
469 filteredLabels.push_back("rareOTUs");
474 m->openOutputFile(outputFileName, out);
475 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
477 m->currentBinLabels = filteredLabels;
479 filteredLookup[0]->printHeaders(out);
481 for (int i = 0; i < filteredLookup.size(); i++) {
482 out << filteredLookup[i]->getLabel() << '\t' << filteredLookup[i]->getGroup() << '\t';
483 filteredLookup[i]->print(out);
488 //save mothurOut's binLabels to restore for next label
489 m->currentBinLabels = saveBinLabels;
491 for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
493 m->mothurOut("\nRemoved " + toString(numRemoved) + " OTUs.\n");
498 catch(exception& e) {
499 m->errorOut(e, "FilterSharedCommand", "processShared");
503 //**********************************************************************************************************************