]> git.donarmstrong.com Git - mothur.git/blob - removeotuscommand.cpp
mods to seq.errror
[mothur.git] / removeotuscommand.cpp
1 /*
2  *  removeotuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removeotuscommand.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> RemoveOtusCommand::getValidParameters(){ 
17         try {
18                 string Array[] =  { "group", "accnos","label", "groups","list","outputdir","inputdir" };
19                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "RemoveOtusCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 RemoveOtusCommand::RemoveOtusCommand(){ 
29         try {
30                 abort = true; calledHelp = true; 
31                 vector<string> tempOutNames;
32                 outputTypes["group"] = tempOutNames;
33                 outputTypes["list"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> RemoveOtusCommand::getRequiredParameters(){      
42         try {
43                 string Array[] =  {"group","label", "list"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "RemoveOtusCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> RemoveOtusCommand::getRequiredFiles(){   
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "RemoveOtusCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 RemoveOtusCommand::RemoveOtusCommand(string option)  {
65         try {
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  { "group", "accnos","label", "groups", "list","outputdir","inputdir" };
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string,string>::iterator it;
81                         
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["group"] = tempOutNames;
90                         outputTypes["list"] = tempOutNames;
91                         
92                         
93                         //if the user changes the output directory command factory will send this info to us in the output parameter 
94                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
95                         
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("accnos");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
107                                 }
108                                 
109                                 it = parameters.find("list");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
115                                 }
116                                 
117                                 it = parameters.find("group");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
123                                 }
124                         }
125                         
126                         
127                         //check for required parameters
128                         accnosfile = validParameter.validFile(parameters, "accnos", true);
129                         if (accnosfile == "not open") { abort = true; }
130                         else if (accnosfile == "not found") {  accnosfile = ""; }       
131                         
132                         groupfile = validParameter.validFile(parameters, "group", true);
133                         if (groupfile == "not open") { abort = true; }
134                         else if (groupfile == "not found") {  groupfile = "";  m->mothurOut("You must provide a group file."); m->mothurOutEndLine(); abort = true; }   
135                         
136                         listfile = validParameter.validFile(parameters, "list", true);
137                         if (listfile == "not open") { abort = true; }
138                         else if (listfile == "not found") {  listfile = ""; m->mothurOut("You must provide a list file."); m->mothurOutEndLine(); abort = true; }       
139                         
140                         groups = validParameter.validFile(parameters, "groups", false);                 
141                         if (groups == "not found") { groups = ""; }
142                         else { 
143                                 m->splitAtDash(groups, Groups);
144                         }
145                         
146                         label = validParameter.validFile(parameters, "label", false);                   
147                         if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
148                         
149                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
150                 }
151                 
152         }
153         catch(exception& e) {
154                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
155                 exit(1);
156         }
157 }
158 //**********************************************************************************************************************
159
160 void RemoveOtusCommand::help(){
161         try {
162                 m->mothurOut("The remove.otus command removes otus containing sequences from a specfic group or set of groups.\n");
163                 m->mothurOut("It outputs a new list file containing the otus containing sequences NOT from in the those specified groups.\n");
164                 m->mothurOut("The remove.otus command parameters are accnos, group, list, label and groups. The group, list and label parameters are required.\n");
165                 m->mothurOut("You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n");
166                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n");
167                 m->mothurOut("The label parameter allows you to specify which distance you want to process.\n");
168                 m->mothurOut("The remove.otus command should be in the following format: remove.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n");
169                 m->mothurOut("Example remove.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n");
170                 m->mothurOut("or remove.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n");
171                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "RemoveOtusCommand", "help");
175                 exit(1);
176         }
177 }
178
179 //**********************************************************************************************************************
180
181 int RemoveOtusCommand::execute(){
182         try {
183                 
184                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
185                 
186                 groupMap = new GroupMap(groupfile);
187                 groupMap->readMap();
188                 
189                 //get groups you want to remove
190                 if (accnosfile != "") { readAccnos(); }
191                 
192                 //make sure groups are valid
193                 //takes care of user setting groupNames that are invalid or setting groups=all
194                 SharedUtil* util = new SharedUtil();
195                 util->setGroups(Groups, groupMap->namesOfGroups);
196                 delete util;
197                 
198                 if (m->control_pressed) { delete groupMap; return 0; }
199                 
200                 //read through the list file keeping any otus that contain any sequence from the groups selected
201                 readListGroup();
202                 
203                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
204                 
205                 if (outputNames.size() != 0) {
206                         m->mothurOutEndLine();
207                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
208                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
209                         m->mothurOutEndLine();
210                 }
211                 
212                 return 0;               
213         }
214         
215         catch(exception& e) {
216                 m->errorOut(e, "RemoveOtusCommand", "execute");
217                 exit(1);
218         }
219 }
220 //**********************************************************************************************************************
221 int RemoveOtusCommand::readListGroup(){
222         try {
223                 string thisOutputDir = outputDir;
224                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
225                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label +  m->getExtension(listfile);
226                 
227                 ofstream out;
228                 m->openOutputFile(outputFileName, out);
229                 
230                 string GroupOutputDir = outputDir;
231                 if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
232                 string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label  + m->getExtension(groupfile);
233                 
234                 ofstream outGroup;
235                 m->openOutputFile(outputGroupFileName, outGroup);
236                 
237                 InputData* input = new InputData(listfile, "list");
238                 ListVector* list = input->getListVector();
239                 string lastLabel = list->getLabel();
240                 
241                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
242                 set<string> labels; labels.insert(label);
243                 set<string> processedLabels;
244                 set<string> userLabels = labels;
245                 
246                 bool wroteSomething = false;
247                 
248                 //as long as you are not at the end of the file or done wih the lines you want
249                 while((list != NULL) && (userLabels.size() != 0)) {
250                         
251                         if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str());return 0;  }
252                         
253                         if(labels.count(list->getLabel()) == 1){
254                                 processList(list, groupMap, out, outGroup, wroteSomething);
255                                 
256                                 processedLabels.insert(list->getLabel());
257                                 userLabels.erase(list->getLabel());
258                         }
259                         
260                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
261                                 string saveLabel = list->getLabel();
262                                 
263                                 delete list; 
264                                 
265                                 list = input->getListVector(lastLabel);
266                                 
267                                 processList(list, groupMap, out, outGroup, wroteSomething);
268                                 
269                                 processedLabels.insert(list->getLabel());
270                                 userLabels.erase(list->getLabel());
271                                 
272                                 //restore real lastlabel to save below
273                                 list->setLabel(saveLabel);
274                         }
275                         
276                         lastLabel = list->getLabel();
277                         
278                         delete list; list = NULL;
279                         
280                         //get next line to process
281                         list = input->getListVector();                          
282                 }
283                 
284                 
285                 if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str()); return 0;  }
286                 
287                 //output error messages about any remaining user labels
288                 set<string>::iterator it;
289                 bool needToRun = false;
290                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
291                         m->mothurOut("Your file does not include the label " + *it); 
292                         if (processedLabels.count(lastLabel) != 1) {
293                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
294                                 needToRun = true;
295                         }else {
296                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
297                         }
298                 }
299                 
300                 //run last label if you need to
301                 if (needToRun == true)  {
302                         if (list != NULL) { delete list; }
303                         
304                         list = input->getListVector(lastLabel);
305                         
306                         processList(list, groupMap, out, outGroup, wroteSomething);
307                         
308                         delete list; list = NULL;
309                 }
310                 
311                 out.close();
312                 outGroup.close();
313                 
314                 if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file ONLY contains otus containing sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
315                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
316                 outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
317                 
318                 return 0;
319                 
320         }
321         catch(exception& e) {
322                 m->errorOut(e, "RemoveOtusCommand", "readList");
323                 exit(1);
324         }
325 }
326 //**********************************************************************************************************************
327 int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
328         try {
329                 
330                 //make a new list vector
331                 ListVector newList;
332                 newList.setLabel(list->getLabel());
333                 
334                 int numOtus = 0;
335                 //for each bin
336                 for (int i = 0; i < list->getNumBins(); i++) {
337                         if (m->control_pressed) { return 0; }
338                         
339                         //parse out names that are in accnos file
340                         string binnames = list->get(i);
341                         
342                         bool removeBin = false;
343                         string groupFileOutput = "";
344                         
345                         //parse names
346                         string individual = "";
347                         int length = binnames.length();
348                         for(int j=0;j<length;j++){
349                                 if(binnames[j] == ','){
350                                         string group = groupMap->getGroup(individual);
351                                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
352                                         
353                                         if (m->inUsersGroups(group, Groups)) {  removeBin = true; break; }
354                                         groupFileOutput += individual + "\t" + group + "\n";
355                                         individual = "";        
356                                         
357                                 }
358                                 else{  individual += binnames[j];  }
359                         }
360                         
361                         if (!removeBin) { 
362                                 //get last name
363                                 string group = groupMap->getGroup(individual);
364                                 if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
365                                 
366                                 if (m->inUsersGroups(group, Groups)) {  removeBin = true; }
367                                 groupFileOutput += individual + "\t" + group + "\n";                            
368                                 
369                                 if (!removeBin) {
370                                         //if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
371                                         newList.push_back(binnames);    
372                                         outGroup << groupFileOutput;
373                                 }else {
374                                         numOtus++;
375                                 }
376                         }else {
377                                 numOtus++;
378                         }
379                         
380                 }
381                 
382                 //print new listvector
383                 if (newList.getNumBins() != 0) {
384                         wroteSomething = true;
385                         newList.print(out);
386                 }
387                 
388                 m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
389                 
390                 return 0;
391                 
392         }
393         catch(exception& e) {
394                 m->errorOut(e, "RemoveOtusCommand", "processList");
395                 exit(1);
396         }
397 }
398 //**********************************************************************************************************************
399 void RemoveOtusCommand::readAccnos(){
400         try {
401                 Groups.clear();
402                 
403                 ifstream in;
404                 m->openInputFile(accnosfile, in);
405                 string name;
406                 
407                 while(!in.eof()){
408                         in >> name;
409                         
410                         Groups.push_back(name);
411                         
412                         m->gobble(in);
413                 }
414                 in.close();             
415                 
416         }
417         catch(exception& e) {
418                 m->errorOut(e, "RemoveOtusCommand", "readAccnos");
419                 exit(1);
420         }
421 }
422 //**********************************************************************************************************************
423
424
425