]> git.donarmstrong.com Git - mothur.git/blob - filtersharedcommand.cpp
added filter.shared command. fixed lci and uci for thetayc calc
[mothur.git] / filtersharedcommand.cpp
1 //
2 //  filtersharedcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/4/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "filtersharedcommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> FilterSharedCommand::setParameters(){    
13         try {           
14                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared);
15                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
16                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17                 CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent);
18         CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund);
19         CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal);
20                 CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "FilterSharedCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string FilterSharedCommand::getHelpString(){    
35         try {
36                 string helpString = "";
37                 helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n";
38                 helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, makerare, groups and label.  You must provide a shared file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41         
42                 helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU.  If any samples abundance falls below the minimum, the OTU is removed. Default=0\n";
43         helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n";
44         helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU.  If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n";
45         
46                 helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed.  This will preserve the number of reads in your dataset. Default=T\n";
47                 helpString += "The filter.shared command should be in the following format: filter.shared(shared=yourSharedFile, minabund=yourMinAbund, groups=yourGroups, label=yourLabels).\n";
48                 helpString += "Example filter.shared(shared=final.an.shared, minabund=10).\n";
49                 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
50                 helpString += "The filter.shared command outputs a .filter.shared file.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "FilterSharedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string FilterSharedCommand::getOutputPattern(string type) {
61     try {
62         string pattern = "";
63         
64         if (type == "shared")      {   pattern = "[filename],[distance],filter,[extension]";    }
65         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
66         
67         return pattern;
68     }
69     catch(exception& e) {
70         m->errorOut(e, "FilterSharedCommand", "getOutputPattern");
71         exit(1);
72     }
73 }
74 //**********************************************************************************************************************
75 FilterSharedCommand::FilterSharedCommand(){     
76         try {
77                 abort = true; calledHelp = true; 
78                 setParameters();
79                 vector<string> tempOutNames;
80                 outputTypes["shared"] = tempOutNames;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "FilterSharedCommand", "GetRelAbundCommand");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 FilterSharedCommand::FilterSharedCommand(string option) {
89         try {
90                 abort = false; calledHelp = false;   
91                 allLines = 1;
92                 
93                 //allow user to run help
94                 if(option == "help") { help(); abort = true; calledHelp = true; }
95                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96                 
97                 else {
98                         vector<string> myArray = setParameters();
99                         
100                         OptionParser parser(option);
101                         map<string,string> parameters = parser.getParameters();
102                         
103                         ValidParameters validParameter;
104                         
105                         //check to make sure all parameters are valid for command
106                         map<string,string>::iterator it;
107                         for (it = parameters.begin(); it != parameters.end(); it++) { 
108                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
109                         }
110                         
111                         //initialize outputTypes
112                         vector<string> tempOutNames;
113                         outputTypes["shared"] = tempOutNames;
114                                     
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                 it = parameters.find("shared");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
126                                 }
127             }
128                                         
129                         sharedfile = validParameter.validFile(parameters, "shared", true);
130                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
131                         else if (sharedfile == "not found") { 
132                                 //if there is a current shared file, use it
133                                 sharedfile = m->getSharedFile(); 
134                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
135                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
136                         }else { m->setSharedFile(sharedfile); }
137                         
138             //if the user changes the output directory command factory will send this info to us in the output parameter 
139                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);     }
140             
141                         //check for optional parameter and set defaults
142                         // ...at some point should added some additional type checking...
143                         label = validParameter.validFile(parameters, "label", false);                   
144                         if (label == "not found") { label = ""; }
145                         else { 
146                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
147                                 else { allLines = 1;  }
148                         }
149                         
150                         groups = validParameter.validFile(parameters, "groups", false);                 
151                         if (groups == "not found") { groups = ""; pickedGroups = false; }
152                         else { 
153                                 pickedGroups = true;
154                                 m->splitAtDash(groups, Groups);
155                                 m->setGroups(Groups);
156                         }
157                         
158             bool setSomething = false;
159                         string temp = validParameter.validFile(parameters, "minabund", false);          
160             if (temp == "not found"){   temp = "-1";            }
161             else { setSomething = true; }
162                         m->mothurConvert(temp, minAbund);  
163             
164             temp = validParameter.validFile(parameters, "mintotal", false);             
165             if (temp == "not found"){   temp = "-1";            }
166             else { setSomething = true; }
167                         m->mothurConvert(temp, minTotal);
168             
169             temp = validParameter.validFile(parameters, "minpercent", false);           
170             if (temp == "not found"){   temp = "-1";            }
171             else { setSomething = true; }
172             
173                         m->mothurConvert(temp, minPercent);
174             if (minPercent < 1) {} //already in percent form
175             else {  minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to %
176                         
177                         temp = validParameter.validFile(parameters, "makerare", false);         if (temp == "not found"){       temp = "T";             }
178                         makeRare = m->isTrue(temp);
179             
180             if (!setSomething) { m->mothurOut("\nYou did not set any parameters. I will filter using minabund=1.\n\n"); minAbund = 1; }
181         }
182         
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "FilterSharedCommand", "FilterSharedCommand");
186                 exit(1);
187         }
188 }
189 //**********************************************************************************************************************
190
191 int FilterSharedCommand::execute(){
192         try {
193         
194                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
195                 
196         InputData input(sharedfile, "sharedfile");
197                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
198                 string lastLabel = lookup[0]->getLabel();
199                 
200                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
201                 set<string> processedLabels;
202                 set<string> userLabels = labels;
203                 
204                 //as long as you are not at the end of the file or done wih the lines you want
205                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
206                         if (m->control_pressed) {   for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  
207                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;  }
208                         
209                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
210                                 
211                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
212                                 
213                                 processShared(lookup);
214                                 
215                                 processedLabels.insert(lookup[0]->getLabel());
216                                 userLabels.erase(lookup[0]->getLabel());
217                         }
218                         
219                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
220                                 string saveLabel = lookup[0]->getLabel();
221                                 
222                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
223                                 
224                                 lookup = input.getSharedRAbundVectors(lastLabel);
225                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
226                                 
227                                 processShared(lookup);
228                                 
229                                 processedLabels.insert(lookup[0]->getLabel());
230                                 userLabels.erase(lookup[0]->getLabel());
231                                 
232                                 //restore real lastlabel to save below
233                                 lookup[0]->setLabel(saveLabel);
234                         }
235                         
236                         lastLabel = lookup[0]->getLabel();
237                         //prevent memory leak
238                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
239                         
240                         //get next line to process
241                         lookup = input.getSharedRAbundVectors();                                
242                 }
243                 
244                 
245                 if (m->control_pressed) {   return 0;  }
246                 
247                 //output error messages about any remaining user labels
248                 set<string>::iterator it;
249                 bool needToRun = false;
250                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
251                         m->mothurOut("Your file does not include the label " + *it); 
252                         if (processedLabels.count(lastLabel) != 1) {
253                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
254                                 needToRun = true;
255                         }else {
256                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
257                         }
258                 }
259                 
260                 //run last label if you need to
261                 if (needToRun == true)  {
262                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
263                         lookup = input.getSharedRAbundVectors(lastLabel);
264                         
265                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
266                         
267                         processShared(lookup);
268                         
269                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
270                 }
271         
272                 //set shared file as new current sharedfile
273                 string current = "";
274         itTypes = outputTypes.find("shared");
275                 if (itTypes != outputTypes.end()) {
276                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
277                 }
278                 
279                 m->mothurOutEndLine();
280                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
281                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
282                 m->mothurOutEndLine();
283                 
284                 return 0;
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "FilterSharedCommand", "execute");
288                 exit(1);
289         }
290 }
291 //**********************************************************************************************************************
292 int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
293         try {
294                 
295                 //save mothurOut's binLabels to restore for next label
296                 vector<string> saveBinLabels = m->currentBinLabels;
297                 
298         map<string, string> variables; 
299         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
300         variables["[extension]"] = m->getExtension(sharedfile);
301         variables["[distance]"] = thislookup[0]->getLabel();
302                 string outputFileName = getOutputFileName("shared", variables);        
303         
304         if (m->control_pressed) {  return 0; }
305         
306         vector<string> filteredLabels;
307         vector<int> rareCounts; rareCounts.resize(m->getGroups().size(), 0);
308         
309         //create new "filtered" lookup
310         vector<SharedRAbundVector*> filteredLookup;
311         for (int i = 0; i < thislookup.size(); i++) {
312             SharedRAbundVector* temp = new SharedRAbundVector();
313                         temp->setLabel(thislookup[i]->getLabel());
314                         temp->setGroup(thislookup[i]->getGroup());
315                         filteredLookup.push_back(temp);
316         }
317         
318         bool filteredSomething = false;
319         int numRemoved = 0;
320         for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
321             
322             if (m->control_pressed) { for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; } return 0; }
323             
324             bool okay = true; //innocent until proven guilty
325             if (minAbund != -1) {
326                 for (int j = 0; j < thislookup.size(); j++) { 
327                     if (thislookup[j]->getAbundance(i) < minAbund) { okay = false; break; }
328                 }
329             }
330             
331             if (okay && (minTotal != -1)) {
332                 int otuTotal = 0;
333                 for (int j = 0; j < thislookup.size(); j++) { 
334                     otuTotal += thislookup[j]->getAbundance(i);
335                 }
336                 if (otuTotal < minTotal) { okay = false; }
337             }
338             
339             if (okay && (minPercent != -0.01)) {
340                 double otuTotal = 0;
341                 double total = 0;
342                 for (int j = 0; j < thislookup.size(); j++) { 
343                     otuTotal += thislookup[j]->getAbundance(i);
344                     total += thislookup[j]->getNumSeqs();
345                 }
346                 double percent = otuTotal / total; 
347                 if (percent < minPercent) { okay = false; }
348             }
349             
350             //did this OTU pass the filter criteria
351             if (okay) {
352                 filteredLabels.push_back(saveBinLabels[i]);
353                 for (int j = 0; j < filteredLookup.size(); j++) { //add this OTU to the filtered lookup
354                     filteredLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
355                 }
356             }else { //if not, do we want to save the counts
357                 filteredSomething = true;
358                 if (makeRare) {
359                     for (int j = 0; j < rareCounts.size(); j++) {  rareCounts[j] += thislookup[j]->getAbundance(i); }
360                 }
361                 numRemoved++;
362             }
363             
364         }
365         
366         //if we are saving the counts add a "rare" OTU if anything was filtered
367         if (makeRare) {
368             if (filteredSomething) {
369                 for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
370                     filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
371                 }
372                 
373                 //create new label
374                 string oldLastLabel = saveBinLabels[saveBinLabels.size()-1];
375                 string tag = "";
376                 string otuNumber = "";
377                 for (int i = 0;i < oldLastLabel.length(); i++){
378                     //add numbers
379                     if( oldLastLabel[i]>47 && oldLastLabel[i]<58){ otuNumber += oldLastLabel[i];  }
380                     else { tag += oldLastLabel[i]; }
381                 }
382                 
383                 int oldLastBin;
384                 m->mothurConvert(otuNumber, oldLastBin);
385                 oldLastBin++;
386                 string newLabel = tag + toString(oldLastBin);
387                 filteredLabels.push_back(newLabel);
388             }
389         }
390         
391         ofstream out;
392                 m->openOutputFile(outputFileName, out);
393                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
394                 
395         m->currentBinLabels = filteredLabels;
396         
397                 filteredLookup[0]->printHeaders(out);
398                 
399                 for (int i = 0; i < filteredLookup.size(); i++) {
400                         out << filteredLookup[i]->getLabel() << '\t' << filteredLookup[i]->getGroup() << '\t';
401                         filteredLookup[i]->print(out);
402                 }
403                 out.close();
404         
405         
406         //save mothurOut's binLabels to restore for next label
407                 m->currentBinLabels = saveBinLabels;
408         
409         for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
410                 
411         m->mothurOut("\nRemoved " + toString(numRemoved) + " OTUs.\n");
412         
413                 return 0;
414                 
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "FilterSharedCommand", "processShared");
418                 exit(1);
419         }
420 }                       
421 //**********************************************************************************************************************
422
423