]> git.donarmstrong.com Git - mothur.git/blob - removeotuscommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / removeotuscommand.cpp
1 /*
2  *  removeotuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removeotuscommand.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> RemoveOtusCommand::getValidParameters(){ 
17         try {
18                 string Array[] =  { "group", "accnos","label", "groups","list","outputdir","inputdir" };
19                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "RemoveOtusCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 RemoveOtusCommand::RemoveOtusCommand(){ 
29         try {
30                 abort = true; calledHelp = true; 
31                 vector<string> tempOutNames;
32                 outputTypes["group"] = tempOutNames;
33                 outputTypes["list"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> RemoveOtusCommand::getRequiredParameters(){      
42         try {
43                 string Array[] =  {"group","label", "list"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "RemoveOtusCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> RemoveOtusCommand::getRequiredFiles(){   
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "RemoveOtusCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 RemoveOtusCommand::RemoveOtusCommand(string option)  {
65         try {
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  { "group", "accnos","label", "groups", "list","outputdir","inputdir" };
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string,string>::iterator it;
81                         
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["group"] = tempOutNames;
90                         outputTypes["list"] = tempOutNames;
91                         
92                         
93                         //if the user changes the output directory command factory will send this info to us in the output parameter 
94                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
95                         
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("accnos");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
107                                 }
108                                 
109                                 it = parameters.find("list");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
115                                 }
116                                 
117                                 it = parameters.find("group");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
123                                 }
124                         }
125                         
126                         
127                         //check for required parameters
128                         accnosfile = validParameter.validFile(parameters, "accnos", true);
129                         if (accnosfile == "not open") { abort = true; }
130                         else if (accnosfile == "not found") {  accnosfile = ""; }       
131                         
132                         groupfile = validParameter.validFile(parameters, "group", true);
133                         if (groupfile == "not open") { abort = true; }
134                         else if (groupfile == "not found") {  groupfile = "";  m->mothurOut("You must provide a group file."); m->mothurOutEndLine(); abort = true; }   
135                         
136                         listfile = validParameter.validFile(parameters, "list", true);
137                         if (listfile == "not open") { abort = true; }
138                         else if (listfile == "not found") {  listfile = ""; m->mothurOut("You must provide a list file."); m->mothurOutEndLine(); abort = true; }       
139                         
140                         groups = validParameter.validFile(parameters, "groups", false);                 
141                         if (groups == "not found") { groups = ""; }
142                         else { 
143                                 m->splitAtDash(groups, Groups);
144                         }
145                         
146                         label = validParameter.validFile(parameters, "label", false);                   
147                         if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
148                         
149                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
150                 }
151                 
152         }
153         catch(exception& e) {
154                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
155                 exit(1);
156         }
157 }
158 //**********************************************************************************************************************
159
160 void RemoveOtusCommand::help(){
161         try {
162                 m->mothurOut("The remove.otus command removes otus containing sequences from a specfic group or set of groups.\n");
163                 m->mothurOut("It outputs a new list file containing the otus containing sequences NOT from in the those specified groups.\n");
164                 m->mothurOut("The remove.otus command parameters are accnos, group, list, label and groups. The group, list and label parameters are required.\n");
165                 m->mothurOut("You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n");
166                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n");
167                 m->mothurOut("The label parameter allows you to specify which distance you want to process.\n");
168                 m->mothurOut("The remove.otus command should be in the following format: remove.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n");
169                 m->mothurOut("Example remove.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n");
170                 m->mothurOut("or remove.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n");
171                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "RemoveOtusCommand", "help");
175                 exit(1);
176         }
177 }
178
179 //**********************************************************************************************************************
180
181 int RemoveOtusCommand::execute(){
182         try {
183                 
184                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
185                 
186                 groupMap = new GroupMap(groupfile);
187                 groupMap->readMap();
188                 
189                 //get groups you want to remove
190                 if (accnosfile != "") { readAccnos(); }
191                 
192                 //make sure groups are valid
193                 //takes care of user setting groupNames that are invalid or setting groups=all
194                 SharedUtil* util = new SharedUtil();
195                 util->setGroups(Groups, groupMap->namesOfGroups);
196                 delete util;
197                 
198                 if (m->control_pressed) { delete groupMap; return 0; }
199                 
200                 //read through the list file keeping any otus that contain any sequence from the groups selected
201                 readListGroup();
202                 
203                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
204                 
205                 if (outputNames.size() != 0) {
206                         m->mothurOutEndLine();
207                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
208                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
209                         m->mothurOutEndLine();
210                         
211                         //set fasta file as new current fastafile
212                         string current = "";
213                         itTypes = outputTypes.find("group");
214                         if (itTypes != outputTypes.end()) {
215                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
216                         }
217                         
218                         itTypes = outputTypes.find("list");
219                         if (itTypes != outputTypes.end()) {
220                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
221                         }
222                 }
223                 
224                 return 0;               
225         }
226         
227         catch(exception& e) {
228                 m->errorOut(e, "RemoveOtusCommand", "execute");
229                 exit(1);
230         }
231 }
232 //**********************************************************************************************************************
233 int RemoveOtusCommand::readListGroup(){
234         try {
235                 string thisOutputDir = outputDir;
236                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
237                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label +  m->getExtension(listfile);
238                 
239                 ofstream out;
240                 m->openOutputFile(outputFileName, out);
241                 
242                 string GroupOutputDir = outputDir;
243                 if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
244                 string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label  + m->getExtension(groupfile);
245                 
246                 ofstream outGroup;
247                 m->openOutputFile(outputGroupFileName, outGroup);
248                 
249                 InputData* input = new InputData(listfile, "list");
250                 ListVector* list = input->getListVector();
251                 string lastLabel = list->getLabel();
252                 
253                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
254                 set<string> labels; labels.insert(label);
255                 set<string> processedLabels;
256                 set<string> userLabels = labels;
257                 
258                 bool wroteSomething = false;
259                 
260                 //as long as you are not at the end of the file or done wih the lines you want
261                 while((list != NULL) && (userLabels.size() != 0)) {
262                         
263                         if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str());return 0;  }
264                         
265                         if(labels.count(list->getLabel()) == 1){
266                                 processList(list, groupMap, out, outGroup, wroteSomething);
267                                 
268                                 processedLabels.insert(list->getLabel());
269                                 userLabels.erase(list->getLabel());
270                         }
271                         
272                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
273                                 string saveLabel = list->getLabel();
274                                 
275                                 delete list; 
276                                 
277                                 list = input->getListVector(lastLabel);
278                                 
279                                 processList(list, groupMap, out, outGroup, wroteSomething);
280                                 
281                                 processedLabels.insert(list->getLabel());
282                                 userLabels.erase(list->getLabel());
283                                 
284                                 //restore real lastlabel to save below
285                                 list->setLabel(saveLabel);
286                         }
287                         
288                         lastLabel = list->getLabel();
289                         
290                         delete list; list = NULL;
291                         
292                         //get next line to process
293                         list = input->getListVector();                          
294                 }
295                 
296                 
297                 if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str()); return 0;  }
298                 
299                 //output error messages about any remaining user labels
300                 set<string>::iterator it;
301                 bool needToRun = false;
302                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
303                         m->mothurOut("Your file does not include the label " + *it); 
304                         if (processedLabels.count(lastLabel) != 1) {
305                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
306                                 needToRun = true;
307                         }else {
308                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
309                         }
310                 }
311                 
312                 //run last label if you need to
313                 if (needToRun == true)  {
314                         if (list != NULL) { delete list; }
315                         
316                         list = input->getListVector(lastLabel);
317                         
318                         processList(list, groupMap, out, outGroup, wroteSomething);
319                         
320                         delete list; list = NULL;
321                 }
322                 
323                 out.close();
324                 outGroup.close();
325                 
326                 if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file ONLY contains otus containing sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
327                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
328                 outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
329                 
330                 return 0;
331                 
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "RemoveOtusCommand", "readList");
335                 exit(1);
336         }
337 }
338 //**********************************************************************************************************************
339 int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
340         try {
341                 
342                 //make a new list vector
343                 ListVector newList;
344                 newList.setLabel(list->getLabel());
345                 
346                 int numOtus = 0;
347                 //for each bin
348                 for (int i = 0; i < list->getNumBins(); i++) {
349                         if (m->control_pressed) { return 0; }
350                         
351                         //parse out names that are in accnos file
352                         string binnames = list->get(i);
353                         
354                         bool removeBin = false;
355                         string groupFileOutput = "";
356                         
357                         //parse names
358                         string individual = "";
359                         int length = binnames.length();
360                         for(int j=0;j<length;j++){
361                                 if(binnames[j] == ','){
362                                         string group = groupMap->getGroup(individual);
363                                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
364                                         
365                                         if (m->inUsersGroups(group, Groups)) {  removeBin = true; break; }
366                                         groupFileOutput += individual + "\t" + group + "\n";
367                                         individual = "";        
368                                         
369                                 }
370                                 else{  individual += binnames[j];  }
371                         }
372                         
373                         if (!removeBin) { 
374                                 //get last name
375                                 string group = groupMap->getGroup(individual);
376                                 if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
377                                 
378                                 if (m->inUsersGroups(group, Groups)) {  removeBin = true; }
379                                 groupFileOutput += individual + "\t" + group + "\n";                            
380                                 
381                                 if (!removeBin) {
382                                         //if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
383                                         newList.push_back(binnames);    
384                                         outGroup << groupFileOutput;
385                                 }else {
386                                         numOtus++;
387                                 }
388                         }else {
389                                 numOtus++;
390                         }
391                         
392                 }
393                 
394                 //print new listvector
395                 if (newList.getNumBins() != 0) {
396                         wroteSomething = true;
397                         newList.print(out);
398                 }
399                 
400                 m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
401                 
402                 return 0;
403                 
404         }
405         catch(exception& e) {
406                 m->errorOut(e, "RemoveOtusCommand", "processList");
407                 exit(1);
408         }
409 }
410 //**********************************************************************************************************************
411 void RemoveOtusCommand::readAccnos(){
412         try {
413                 Groups.clear();
414                 
415                 ifstream in;
416                 m->openInputFile(accnosfile, in);
417                 string name;
418                 
419                 while(!in.eof()){
420                         in >> name;
421                         
422                         Groups.push_back(name);
423                         
424                         m->gobble(in);
425                 }
426                 in.close();             
427                 
428         }
429         catch(exception& e) {
430                 m->errorOut(e, "RemoveOtusCommand", "readAccnos");
431                 exit(1);
432         }
433 }
434 //**********************************************************************************************************************
435
436
437