]> git.donarmstrong.com Git - mothur.git/blob - removeotuscommand.cpp
mods in testing 1.16.0
[mothur.git] / removeotuscommand.cpp
1 /*
2  *  removeotuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removeotuscommand.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> RemoveOtusCommand::getValidParameters(){ 
17         try {
18                 string Array[] =  { "group", "accnos","label", "groups","list","outputdir","inputdir" };
19                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "RemoveOtusCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 RemoveOtusCommand::RemoveOtusCommand(){ 
29         try {
30                 abort = true;
31                 //initialize outputTypes
32                 vector<string> tempOutNames;
33                 outputTypes["group"] = tempOutNames;
34                 outputTypes["list"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> RemoveOtusCommand::getRequiredParameters(){      
43         try {
44                 string Array[] =  {"group","label", "list"};
45                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "RemoveOtusCommand", "getRequiredParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 vector<string> RemoveOtusCommand::getRequiredFiles(){   
55         try {
56                 vector<string> myArray;
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "RemoveOtusCommand", "getRequiredFiles");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 RemoveOtusCommand::RemoveOtusCommand(string option)  {
66         try {
67                 abort = false;
68                 
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string Array[] =  { "group", "accnos","label", "groups", "list","outputdir","inputdir" };
75                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string,string> parameters = parser.getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string,string>::iterator it;
82                         
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["group"] = tempOutNames;
91                         outputTypes["list"] = tempOutNames;
92                         
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("accnos");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
108                                 }
109                                 
110                                 it = parameters.find("list");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
116                                 }
117                                 
118                                 it = parameters.find("group");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
124                                 }
125                         }
126                         
127                         
128                         //check for required parameters
129                         accnosfile = validParameter.validFile(parameters, "accnos", true);
130                         if (accnosfile == "not open") { abort = true; }
131                         else if (accnosfile == "not found") {  accnosfile = ""; }       
132                         
133                         groupfile = validParameter.validFile(parameters, "group", true);
134                         if (groupfile == "not open") { abort = true; }
135                         else if (groupfile == "not found") {  groupfile = "";  m->mothurOut("You must provide a group file."); m->mothurOutEndLine(); abort = true; }   
136                         
137                         listfile = validParameter.validFile(parameters, "list", true);
138                         if (listfile == "not open") { abort = true; }
139                         else if (listfile == "not found") {  listfile = ""; m->mothurOut("You must provide a list file."); m->mothurOutEndLine(); abort = true; }       
140                         
141                         groups = validParameter.validFile(parameters, "groups", false);                 
142                         if (groups == "not found") { groups = ""; }
143                         else { 
144                                 m->splitAtDash(groups, Groups);
145                         }
146                         
147                         label = validParameter.validFile(parameters, "label", false);                   
148                         if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
149                         
150                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
151                 }
152                 
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
156                 exit(1);
157         }
158 }
159 //**********************************************************************************************************************
160
161 void RemoveOtusCommand::help(){
162         try {
163                 m->mothurOut("The remove.otus command removes otus containing sequences from a specfic group or set of groups.\n");
164                 m->mothurOut("It outputs a new list file containing the otus containing sequences NOT from in the those specified groups.\n");
165                 m->mothurOut("The remove.otus command parameters are accnos, group, list, label and groups. The group, list and label parameters are required.\n");
166                 m->mothurOut("You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n");
167                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n");
168                 m->mothurOut("The label parameter allows you to specify which distance you want to process.\n");
169                 m->mothurOut("The remove.otus command should be in the following format: remove.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n");
170                 m->mothurOut("Example remove.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n");
171                 m->mothurOut("or remove.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n");
172                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
173         }
174         catch(exception& e) {
175                 m->errorOut(e, "RemoveOtusCommand", "help");
176                 exit(1);
177         }
178 }
179
180 //**********************************************************************************************************************
181
182 int RemoveOtusCommand::execute(){
183         try {
184                 
185                 if (abort == true) { return 0; }
186                 
187                 groupMap = new GroupMap(groupfile);
188                 groupMap->readMap();
189                 
190                 //get groups you want to remove
191                 if (accnosfile != "") { readAccnos(); }
192                 
193                 //make sure groups are valid
194                 //takes care of user setting groupNames that are invalid or setting groups=all
195                 SharedUtil* util = new SharedUtil();
196                 util->setGroups(Groups, groupMap->namesOfGroups);
197                 delete util;
198                 
199                 if (m->control_pressed) { delete groupMap; return 0; }
200                 
201                 //read through the list file keeping any otus that contain any sequence from the groups selected
202                 readListGroup();
203                 
204                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
205                 
206                 if (outputNames.size() != 0) {
207                         m->mothurOutEndLine();
208                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
209                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
210                         m->mothurOutEndLine();
211                 }
212                 
213                 return 0;               
214         }
215         
216         catch(exception& e) {
217                 m->errorOut(e, "RemoveOtusCommand", "execute");
218                 exit(1);
219         }
220 }
221 //**********************************************************************************************************************
222 int RemoveOtusCommand::readListGroup(){
223         try {
224                 string thisOutputDir = outputDir;
225                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
226                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label +  m->getExtension(listfile);
227                 
228                 ofstream out;
229                 m->openOutputFile(outputFileName, out);
230                 
231                 string GroupOutputDir = outputDir;
232                 if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
233                 string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label  + m->getExtension(groupfile);
234                 
235                 ofstream outGroup;
236                 m->openOutputFile(outputGroupFileName, outGroup);
237                 
238                 InputData* input = new InputData(listfile, "list");
239                 ListVector* list = input->getListVector();
240                 string lastLabel = list->getLabel();
241                 
242                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
243                 set<string> labels; labels.insert(label);
244                 set<string> processedLabels;
245                 set<string> userLabels = labels;
246                 
247                 bool wroteSomething = false;
248                 
249                 //as long as you are not at the end of the file or done wih the lines you want
250                 while((list != NULL) && (userLabels.size() != 0)) {
251                         
252                         if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str());return 0;  }
253                         
254                         if(labels.count(list->getLabel()) == 1){
255                                 processList(list, groupMap, out, outGroup, wroteSomething);
256                                 
257                                 processedLabels.insert(list->getLabel());
258                                 userLabels.erase(list->getLabel());
259                         }
260                         
261                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
262                                 string saveLabel = list->getLabel();
263                                 
264                                 delete list; 
265                                 
266                                 list = input->getListVector(lastLabel);
267                                 
268                                 processList(list, groupMap, out, outGroup, wroteSomething);
269                                 
270                                 processedLabels.insert(list->getLabel());
271                                 userLabels.erase(list->getLabel());
272                                 
273                                 //restore real lastlabel to save below
274                                 list->setLabel(saveLabel);
275                         }
276                         
277                         lastLabel = list->getLabel();
278                         
279                         delete list; list = NULL;
280                         
281                         //get next line to process
282                         list = input->getListVector();                          
283                 }
284                 
285                 
286                 if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str()); return 0;  }
287                 
288                 //output error messages about any remaining user labels
289                 set<string>::iterator it;
290                 bool needToRun = false;
291                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
292                         m->mothurOut("Your file does not include the label " + *it); 
293                         if (processedLabels.count(lastLabel) != 1) {
294                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
295                                 needToRun = true;
296                         }else {
297                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
298                         }
299                 }
300                 
301                 //run last label if you need to
302                 if (needToRun == true)  {
303                         if (list != NULL) { delete list; }
304                         
305                         list = input->getListVector(lastLabel);
306                         
307                         processList(list, groupMap, out, outGroup, wroteSomething);
308                         
309                         delete list; list = NULL;
310                 }
311                 
312                 out.close();
313                 outGroup.close();
314                 
315                 if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file ONLY contains otus containing sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
316                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
317                 outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
318                 
319                 return 0;
320                 
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "RemoveOtusCommand", "readList");
324                 exit(1);
325         }
326 }
327 //**********************************************************************************************************************
328 int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
329         try {
330                 
331                 //make a new list vector
332                 ListVector newList;
333                 newList.setLabel(list->getLabel());
334                 
335                 int numOtus = 0;
336                 //for each bin
337                 for (int i = 0; i < list->getNumBins(); i++) {
338                         if (m->control_pressed) { return 0; }
339                         
340                         //parse out names that are in accnos file
341                         string binnames = list->get(i);
342                         
343                         bool removeBin = false;
344                         string groupFileOutput = "";
345                         
346                         //parse names
347                         string individual = "";
348                         int length = binnames.length();
349                         for(int j=0;j<length;j++){
350                                 if(binnames[j] == ','){
351                                         string group = groupMap->getGroup(individual);
352                                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
353                                         
354                                         if (m->inUsersGroups(group, Groups)) {  removeBin = true; break; }
355                                         groupFileOutput += individual + "\t" + group + "\n";
356                                         individual = "";        
357                                         
358                                 }
359                                 else{  individual += binnames[j];  }
360                         }
361                         
362                         if (!removeBin) { 
363                                 //get last name
364                                 string group = groupMap->getGroup(individual);
365                                 if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
366                                 
367                                 if (m->inUsersGroups(group, Groups)) {  removeBin = true; }
368                                 groupFileOutput += individual + "\t" + group + "\n";                            
369                                 
370                                 if (!removeBin) {
371                                         //if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
372                                         newList.push_back(binnames);    
373                                         outGroup << groupFileOutput;
374                                 }else {
375                                         numOtus++;
376                                 }
377                         }else {
378                                 numOtus++;
379                         }
380                         
381                 }
382                 
383                 //print new listvector
384                 if (newList.getNumBins() != 0) {
385                         wroteSomething = true;
386                         newList.print(out);
387                 }
388                 
389                 m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
390                 
391                 return 0;
392                 
393         }
394         catch(exception& e) {
395                 m->errorOut(e, "RemoveOtusCommand", "processList");
396                 exit(1);
397         }
398 }
399 //**********************************************************************************************************************
400 void RemoveOtusCommand::readAccnos(){
401         try {
402                 Groups.clear();
403                 
404                 ifstream in;
405                 m->openInputFile(accnosfile, in);
406                 string name;
407                 
408                 while(!in.eof()){
409                         in >> name;
410                         
411                         Groups.push_back(name);
412                         
413                         m->gobble(in);
414                 }
415                 in.close();             
416                 
417         }
418         catch(exception& e) {
419                 m->errorOut(e, "RemoveOtusCommand", "readAccnos");
420                 exit(1);
421         }
422 }
423 //**********************************************************************************************************************
424
425
426