]> git.donarmstrong.com Git - mothur.git/blob - getotuscommand.cpp
working on current change
[mothur.git] / getotuscommand.cpp
1 /*
2  *  getotuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getotuscommand.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> GetOtusCommand::setParameters(){ 
17         try {
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
19                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
20                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "GetOtusCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string GetOtusCommand::getHelpString(){ 
37         try {
38                 string helpString = "";
39                 helpString += "The get.otus command selects otus containing sequences from a specfic group or set of groups.\n";
40                 helpString += "It outputs a new list file containing the otus containing sequences from in the those specified groups.\n";
41                 helpString += "The get.otus command parameters are accnos, group, list, label and groups. The group and list parameters are required, unless you have valid current files.\n";
42                 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
43                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n";
44                 helpString += "The label parameter allows you to specify which distance you want to process.\n";
45                 helpString += "The get.otus command should be in the following format: get.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n";
46                 helpString += "Example get.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n";
47                 helpString += "or get.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "GetOtusCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57 //**********************************************************************************************************************
58 GetOtusCommand::GetOtusCommand(){       
59         try {
60                 abort = true; calledHelp = true;
61                 setParameters();
62                 vector<string> tempOutNames;
63                 outputTypes["group"] = tempOutNames;
64                 outputTypes["list"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "GetOtusCommand", "GetOtusCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 GetOtusCommand::GetOtusCommand(string option)  {
73         try {
74                 abort = false; calledHelp = false;   
75                 
76                 //allow user to run help
77                 if(option == "help") { help(); abort = true; calledHelp = true; }
78                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
79                 
80                 else {
81                         vector<string> myArray = setParameters();
82                         
83                         OptionParser parser(option);
84                         map<string,string> parameters = parser.getParameters();
85                         
86                         ValidParameters validParameter;
87                         map<string,string>::iterator it;
88                         
89                         //check to make sure all parameters are valid for command
90                         for (it = parameters.begin(); it != parameters.end(); it++) { 
91                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
92                         }
93                         
94                         //initialize outputTypes
95                         vector<string> tempOutNames;
96                         outputTypes["group"] = tempOutNames;
97                         outputTypes["list"] = tempOutNames;
98                         
99                         
100                         //if the user changes the output directory command factory will send this info to us in the output parameter 
101                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
102                         
103                         //if the user changes the input directory command factory will send this info to us in the output parameter 
104                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
105                         if (inputDir == "not found"){   inputDir = "";          }
106                         else {
107                                 string path;
108                                 it = parameters.find("accnos");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
114                                 }
115                                 
116                                 it = parameters.find("list");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
122                                 }
123                                 
124                                 it = parameters.find("group");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
130                                 }
131                         }
132                         
133                         
134                         //check for required parameters
135                         accnosfile = validParameter.validFile(parameters, "accnos", true);
136                         if (accnosfile == "not open") { abort = true; }
137                         else if (accnosfile == "not found") {  accnosfile = ""; }
138                         else { m->setAccnosFile(accnosfile); }
139                         
140                         groupfile = validParameter.validFile(parameters, "group", true);
141                         if (groupfile == "not open") { abort = true; }
142                         else if (groupfile == "not found") {                            
143                                 groupfile = m->getGroupFile(); 
144                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
145                                 else {  m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
146                         }else { m->setGroupFile(groupfile); }   
147                         
148                         listfile = validParameter.validFile(parameters, "list", true);
149                         if (listfile == "not open") { abort = true; }
150                         else if (listfile == "not found") {                             
151                                 listfile = m->getListFile(); 
152                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
153                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
154                         }else { m->setListFile(listfile); }     
155                         
156                         groups = validParameter.validFile(parameters, "groups", false);                 
157                         if (groups == "not found") { groups = ""; }
158                         else { 
159                                 m->splitAtDash(groups, Groups);
160                         }
161                         
162                         label = validParameter.validFile(parameters, "label", false);                   
163                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
164                         
165                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
166                 }
167                 
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "GetOtusCommand", "GetOtusCommand");
171                 exit(1);
172         }
173 }
174 //**********************************************************************************************************************
175
176 int GetOtusCommand::execute(){
177         try {
178                 
179                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
180                 
181                 groupMap = new GroupMap(groupfile);
182                 groupMap->readMap();
183                 
184                 //get groups you want to get
185                 if (accnosfile != "") { readAccnos(); }
186                 
187                 //make sure groups are valid
188                 //takes care of user setting groupNames that are invalid or setting groups=all
189                 SharedUtil* util = new SharedUtil();
190                 util->setGroups(Groups, groupMap->namesOfGroups);
191                 delete util;
192                 
193                 if (m->control_pressed) { delete groupMap; return 0; }
194                 
195                 //read through the list file keeping any otus that contain any sequence from the groups selected
196                 readListGroup();
197                 
198                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
199                                 
200                 if (outputNames.size() != 0) {
201                         m->mothurOutEndLine();
202                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
203                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
204                         m->mothurOutEndLine();
205                         
206                         //set list file as new current listfile
207                         string current = "";
208                         itTypes = outputTypes.find("group");
209                         if (itTypes != outputTypes.end()) {
210                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
211                         }
212                         
213                         itTypes = outputTypes.find("list");
214                         if (itTypes != outputTypes.end()) {
215                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
216                         }
217                 }
218                 
219                 return 0;               
220         }
221         
222         catch(exception& e) {
223                 m->errorOut(e, "GetOtusCommand", "execute");
224                 exit(1);
225         }
226 }
227 //**********************************************************************************************************************
228 int GetOtusCommand::readListGroup(){
229         try {
230                 string thisOutputDir = outputDir;
231                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
232                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label +  m->getExtension(listfile);
233                 
234                 ofstream out;
235                 m->openOutputFile(outputFileName, out);
236                 
237                 string GroupOutputDir = outputDir;
238                 if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
239                 string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label  + m->getExtension(groupfile);
240                 
241                 ofstream outGroup;
242                 m->openOutputFile(outputGroupFileName, outGroup);
243                         
244                 InputData* input = new InputData(listfile, "list");
245                 ListVector* list = input->getListVector();
246                 string lastLabel = list->getLabel();
247                 
248                 //using first label seen if none is provided
249                 if (label == "") { label = lastLabel; }
250                 
251                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
252                 set<string> labels; labels.insert(label);
253                 set<string> processedLabels;
254                 set<string> userLabels = labels;
255                 
256                 bool wroteSomething = false;
257
258                 //as long as you are not at the end of the file or done wih the lines you want
259                 while((list != NULL) && (userLabels.size() != 0)) {
260                         
261                         if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str());return 0;  }
262                         
263                         if(labels.count(list->getLabel()) == 1){
264                                 processList(list, groupMap, out, outGroup, wroteSomething);
265                                 
266                                 processedLabels.insert(list->getLabel());
267                                 userLabels.erase(list->getLabel());
268                         }
269                         
270                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
271                                 string saveLabel = list->getLabel();
272                                 
273                                 delete list; 
274                                 
275                                 list = input->getListVector(lastLabel);
276                                 
277                                 processList(list, groupMap, out, outGroup, wroteSomething);
278                                 
279                                 processedLabels.insert(list->getLabel());
280                                 userLabels.erase(list->getLabel());
281                                 
282                                 //restore real lastlabel to save below
283                                 list->setLabel(saveLabel);
284                         }
285                         
286                         lastLabel = list->getLabel();
287                         
288                         delete list; list = NULL;
289                         
290                         //get next line to process
291                         list = input->getListVector();                          
292                 }
293                 
294                 
295                 if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str()); return 0;  }
296                 
297                 //output error messages about any remaining user labels
298                 set<string>::iterator it;
299                 bool needToRun = false;
300                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
301                         m->mothurOut("Your file does not include the label " + *it); 
302                         if (processedLabels.count(lastLabel) != 1) {
303                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
304                                 needToRun = true;
305                         }else {
306                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
307                         }
308                 }
309                 
310                 //run last label if you need to
311                 if (needToRun == true)  {
312                         if (list != NULL) { delete list; }
313                         
314                         list = input->getListVector(lastLabel);
315                         
316                         processList(list, groupMap, out, outGroup, wroteSomething);
317                         
318                         delete list; list = NULL;
319                 }
320                                         
321                 out.close();
322                 outGroup.close();
323                 
324                 if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file does NOT contain any otus containing sequences from the groups you wish to get."); m->mothurOutEndLine();  }
325                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
326                 outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
327                 
328                 return 0;
329                 
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "GetOtusCommand", "readList");
333                 exit(1);
334         }
335 }
336 //**********************************************************************************************************************
337 int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
338         try {
339                 
340                 //make a new list vector
341                 ListVector newList;
342                 newList.setLabel(list->getLabel());
343                 
344                 int numOtus = 0;
345                 //for each bin
346                 for (int i = 0; i < list->getNumBins(); i++) {
347                         if (m->control_pressed) { return 0; }
348                         
349                         //parse out names that are in accnos file
350                         string binnames = list->get(i);
351                         
352                         bool keepBin = false;
353                         string groupFileOutput = "";
354                         
355                         //parse names
356                         string individual = "";
357                         int length = binnames.length();
358                         for(int j=0;j<length;j++){
359                                 if(binnames[j] == ','){
360                                         string group = groupMap->getGroup(individual);
361                                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
362                                         
363                                         if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
364                                         groupFileOutput += individual + "\t" + group + "\n";
365                                         individual = "";        
366                                         
367                                 }
368                                 else{  individual += binnames[j];  }
369                         }
370                         
371                         string group = groupMap->getGroup(individual);
372                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
373                         
374                         if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
375                         groupFileOutput += individual + "\t" + group + "\n";
376                         
377                         //if there are sequences from the groups we want in this bin add to new list, output to groupfile
378                         if (keepBin) {  
379                                 newList.push_back(binnames);    
380                                 outGroup << groupFileOutput;
381                                 numOtus++;
382                         }
383                 }
384                 
385                 //print new listvector
386                 if (newList.getNumBins() != 0) {
387                         wroteSomething = true;
388                         newList.print(out);
389                 }
390                 
391                 m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
392         
393                 return 0;
394         }
395         catch(exception& e) {
396                 m->errorOut(e, "GetOtusCommand", "processList");
397                 exit(1);
398         }
399 }
400 //**********************************************************************************************************************
401 void GetOtusCommand::readAccnos(){
402         try {
403                 Groups.clear();
404                 
405                 ifstream in;
406                 m->openInputFile(accnosfile, in);
407                 string name;
408                 
409                 while(!in.eof()){
410                         in >> name;
411                         
412                         Groups.push_back(name);
413                         
414                         m->gobble(in);
415                 }
416                 in.close();             
417                 
418         }
419         catch(exception& e) {
420                 m->errorOut(e, "GetOtusCommand", "readAccnos");
421                 exit(1);
422         }
423 }
424 //**********************************************************************************************************************
425
426