]> git.donarmstrong.com Git - mothur.git/blob - removeotuscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / removeotuscommand.cpp
1 /*
2  *  removeotuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removeotuscommand.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveOtusCommand::setParameters(){      
16         try {
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
18                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
19                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
20                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "RemoveOtusCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string RemoveOtusCommand::getHelpString(){      
36         try {
37                 string helpString = "";
38                 helpString += "The remove.otus command removes otus containing sequences from a specfic group or set of groups.\n";
39                 helpString += "It outputs a new list file containing the otus containing sequences NOT from in the those specified groups.\n";
40                 helpString += "The remove.otus command parameters are accnos, group, list, label and groups. The group and list parameters are required, unless you have valid current files.\n";
41                 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
42                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n";
43                 helpString += "The label parameter allows you to specify which distance you want to process.\n";
44                 helpString += "The remove.otus command should be in the following format: remove.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n";
45                 helpString += "Example remove.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n";
46                 helpString += "or remove.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "RemoveOtusCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string RemoveOtusCommand::getOutputFileNameTag(string type, string inputName=""){       
57         try {
58         string outputFileName = "";
59                 map<string, vector<string> >::iterator it;
60         
61         //is this a type this command creates
62         it = outputTypes.find(type);
63         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
64         else {
65             if (type == "group")            {   outputFileName = "pick" + m->getExtension(inputName);  }
66             else if (type == "list")        {   outputFileName = "pick" + m->getExtension(inputName);  }
67             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
68         }
69         return outputFileName;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "RemoveOtusCommand", "getOutputFileNameTag");
73                 exit(1);
74         }
75 }
76 ///**********************************************************************************************************************
77 RemoveOtusCommand::RemoveOtusCommand(){ 
78         try {
79                 abort = true; calledHelp = true; 
80                 setParameters();
81                 vector<string> tempOutNames;
82                 outputTypes["group"] = tempOutNames;
83                 outputTypes["list"] = tempOutNames;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
87                 exit(1);
88         }
89 }
90
91 //**********************************************************************************************************************
92 RemoveOtusCommand::RemoveOtusCommand(string option)  {
93         try {
94                 abort = false; calledHelp = false;   
95                 
96                 //allow user to run help
97                 if(option == "help") { help(); abort = true; calledHelp = true; }
98                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
99                 
100                 else {
101                         vector<string> myArray = setParameters();
102                         
103                         OptionParser parser(option);
104                         map<string,string> parameters = parser.getParameters();
105                         
106                         ValidParameters validParameter;
107                         map<string,string>::iterator it;
108                         
109                         //check to make sure all parameters are valid for command
110                         for (it = parameters.begin(); it != parameters.end(); it++) { 
111                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
112                         }
113                         
114                         //initialize outputTypes
115                         vector<string> tempOutNames;
116                         outputTypes["group"] = tempOutNames;
117                         outputTypes["list"] = tempOutNames;
118                         
119                         
120                         //if the user changes the output directory command factory will send this info to us in the output parameter 
121                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
122                         
123                         //if the user changes the input directory command factory will send this info to us in the output parameter 
124                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
125                         if (inputDir == "not found"){   inputDir = "";          }
126                         else {
127                                 string path;
128                                 it = parameters.find("accnos");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
134                                 }
135                                 
136                                 it = parameters.find("list");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
142                                 }
143                                 
144                                 it = parameters.find("group");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
150                                 }
151                         }
152                         
153                         
154                         //check for required parameters
155                         accnosfile = validParameter.validFile(parameters, "accnos", true);
156                         if (accnosfile == "not open") { abort = true; }
157                         else if (accnosfile == "not found") {  accnosfile = ""; }
158                         else { m->setAccnosFile(accnosfile); }
159                         
160                         groupfile = validParameter.validFile(parameters, "group", true);
161                         if (groupfile == "not open") { abort = true; }
162                         else if (groupfile == "not found") {                            
163                                 groupfile = m->getGroupFile(); 
164                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
165                                 else {  m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
166                         }else { m->setGroupFile(groupfile); }   
167                         
168                         listfile = validParameter.validFile(parameters, "list", true);
169                         if (listfile == "not open") { abort = true; }
170                         else if (listfile == "not found") {                             
171                                 listfile = m->getListFile(); 
172                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
173                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
174                         }else { m->setListFile(listfile); }     
175                         
176                         groups = validParameter.validFile(parameters, "groups", false);                 
177                         if (groups == "not found") { groups = ""; }
178                         else { 
179                                 m->splitAtDash(groups, Groups);
180                         }
181                         
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
184                         
185                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
186                 }
187                 
188         }
189         catch(exception& e) {
190                 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
191                 exit(1);
192         }
193 }
194 //**********************************************************************************************************************
195
196 int RemoveOtusCommand::execute(){
197         try {
198                 
199                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
200                 
201                 groupMap = new GroupMap(groupfile);
202                 groupMap->readMap();
203                 
204                 //get groups you want to remove
205                 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups);  }
206                 
207                 //make sure groups are valid
208                 //takes care of user setting groupNames that are invalid or setting groups=all
209                 SharedUtil* util = new SharedUtil();
210                 vector<string> allGroups = groupMap->getNamesOfGroups();
211                 util->setGroups(Groups, allGroups);
212                 delete util;
213                 
214                 if (m->control_pressed) { delete groupMap; return 0; }
215                 
216                 //read through the list file keeping any otus that contain any sequence from the groups selected
217                 readListGroup();
218                 
219                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
220                 
221                 if (outputNames.size() != 0) {
222                         m->mothurOutEndLine();
223                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
224                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
225                         m->mothurOutEndLine();
226                         
227                         //set fasta file as new current fastafile
228                         string current = "";
229                         itTypes = outputTypes.find("group");
230                         if (itTypes != outputTypes.end()) {
231                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
232                         }
233                         
234                         itTypes = outputTypes.find("list");
235                         if (itTypes != outputTypes.end()) {
236                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
237                         }
238                 }
239                 
240                 return 0;               
241         }
242         
243         catch(exception& e) {
244                 m->errorOut(e, "RemoveOtusCommand", "execute");
245                 exit(1);
246         }
247 }
248 //**********************************************************************************************************************
249 int RemoveOtusCommand::readListGroup(){
250         try {
251                 string thisOutputDir = outputDir;
252                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
253                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("list", listfile);
254                 
255                 ofstream out;
256                 m->openOutputFile(outputFileName, out);
257                 
258                 string GroupOutputDir = outputDir;
259                 if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
260                 string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + label + "." + getOutputFileNameTag("group", groupfile);
261                 
262                 ofstream outGroup;
263                 m->openOutputFile(outputGroupFileName, outGroup);
264                 
265                 InputData* input = new InputData(listfile, "list");
266                 ListVector* list = input->getListVector();
267                 string lastLabel = list->getLabel();
268                 
269                 //using first label seen if none is provided
270                 if (label == "") { label = lastLabel; }
271                 
272                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
273                 set<string> labels; labels.insert(label);
274                 set<string> processedLabels;
275                 set<string> userLabels = labels;
276                 
277                 bool wroteSomething = false;
278                 
279                 //as long as you are not at the end of the file or done wih the lines you want
280                 while((list != NULL) && (userLabels.size() != 0)) {
281                         
282                         if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); m->mothurRemove(outputFileName);  m->mothurRemove(outputGroupFileName);return 0;  }
283                         
284                         if(labels.count(list->getLabel()) == 1){
285                                 processList(list, groupMap, out, outGroup, wroteSomething);
286                                 
287                                 processedLabels.insert(list->getLabel());
288                                 userLabels.erase(list->getLabel());
289                         }
290                         
291                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
292                                 string saveLabel = list->getLabel();
293                                 
294                                 delete list; 
295                                 
296                                 list = input->getListVector(lastLabel);
297                                 
298                                 processList(list, groupMap, out, outGroup, wroteSomething);
299                                 
300                                 processedLabels.insert(list->getLabel());
301                                 userLabels.erase(list->getLabel());
302                                 
303                                 //restore real lastlabel to save below
304                                 list->setLabel(saveLabel);
305                         }
306                         
307                         lastLabel = list->getLabel();
308                         
309                         delete list; list = NULL;
310                         
311                         //get next line to process
312                         list = input->getListVector();                          
313                 }
314                 
315                 
316                 if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); m->mothurRemove(outputFileName);  m->mothurRemove(outputGroupFileName); return 0;  }
317                 
318                 //output error messages about any remaining user labels
319                 set<string>::iterator it;
320                 bool needToRun = false;
321                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
322                         m->mothurOut("Your file does not include the label " + *it); 
323                         if (processedLabels.count(lastLabel) != 1) {
324                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
325                                 needToRun = true;
326                         }else {
327                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
328                         }
329                 }
330                 
331                 //run last label if you need to
332                 if (needToRun == true)  {
333                         if (list != NULL) { delete list; }
334                         
335                         list = input->getListVector(lastLabel);
336                         
337                         processList(list, groupMap, out, outGroup, wroteSomething);
338                         
339                         delete list; list = NULL;
340                 }
341                 
342                 out.close();
343                 outGroup.close();
344                 
345                 if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file ONLY contains otus containing sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
346                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
347                 outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
348                 
349                 return 0;
350                 
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "RemoveOtusCommand", "readList");
354                 exit(1);
355         }
356 }
357 //**********************************************************************************************************************
358 int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
359         try {
360                 
361                 //make a new list vector
362                 ListVector newList;
363                 newList.setLabel(list->getLabel());
364                 
365                 int numOtus = 0;
366                 //for each bin
367                 for (int i = 0; i < list->getNumBins(); i++) {
368                         if (m->control_pressed) { return 0; }
369                         
370                         //parse out names that are in accnos file
371                         string binnames = list->get(i);
372                         
373                         bool removeBin = false;
374                         string groupFileOutput = "";
375                         
376                         //parse names
377                         string individual = "";
378                         int length = binnames.length();
379                         for(int j=0;j<length;j++){
380                                 if(binnames[j] == ','){
381                                         string group = groupMap->getGroup(individual);
382                                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
383                                         
384                                         if (m->inUsersGroups(group, Groups)) {  removeBin = true; break; }
385                                         groupFileOutput += individual + "\t" + group + "\n";
386                                         individual = "";        
387                                         
388                                 }
389                                 else{  individual += binnames[j];  }
390                         }
391                         
392                         if (!removeBin) { 
393                                 //get last name
394                                 string group = groupMap->getGroup(individual);
395                                 if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
396                                 
397                                 if (m->inUsersGroups(group, Groups)) {  removeBin = true; }
398                                 groupFileOutput += individual + "\t" + group + "\n";                            
399                                 
400                                 if (!removeBin) {
401                                         //if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
402                                         newList.push_back(binnames);    
403                                         outGroup << groupFileOutput;
404                                 }else {
405                                         numOtus++;
406                                 }
407                         }else {
408                                 numOtus++;
409                         }
410                         
411                 }
412                 
413                 //print new listvector
414                 if (newList.getNumBins() != 0) {
415                         wroteSomething = true;
416                         newList.print(out);
417                 }
418                 
419                 m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
420                 
421                 return 0;
422                 
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "RemoveOtusCommand", "processList");
426                 exit(1);
427         }
428 }
429 //**********************************************************************************************************************
430
431
432