]> git.donarmstrong.com Git - mothur.git/blob - getotuscommand.cpp
working on pam
[mothur.git] / getotuscommand.cpp
1 /*
2  *  getotuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getotuscommand.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> GetOtusCommand::setParameters(){ 
17         try {
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","group",false,true, true); parameters.push_back(pgroup);
19                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","list",false,true, true); parameters.push_back(plist);
20                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
22                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "GetOtusCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string GetOtusCommand::getHelpString(){ 
37         try {
38                 string helpString = "";
39                 helpString += "The get.otus command selects otus containing sequences from a specfic group or set of groups.\n";
40                 helpString += "It outputs a new list file containing the otus containing sequences from in the those specified groups.\n";
41                 helpString += "The get.otus command parameters are accnos, group, list, label and groups. The group and list parameters are required, unless you have valid current files.\n";
42                 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
43                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n";
44                 helpString += "The label parameter allows you to specify which distance you want to process.\n";
45                 helpString += "The get.otus command should be in the following format: get.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n";
46                 helpString += "Example get.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n";
47                 helpString += "or get.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "GetOtusCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string GetOtusCommand::getOutputPattern(string type) {
58     try {
59         string pattern = "";
60         
61         if (type == "group")       {   pattern = "[filename],[tag],pick,[extension]";    }
62         else if (type == "list")        {   pattern = "[filename],[tag],pick,[extension]";    }
63         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
64         
65         return pattern;
66     }
67     catch(exception& e) {
68         m->errorOut(e, "GetOtusCommand", "getOutputPattern");
69         exit(1);
70     }
71 }
72 //**********************************************************************************************************************
73 GetOtusCommand::GetOtusCommand(){       
74         try {
75                 abort = true; calledHelp = true;
76                 setParameters();
77                 vector<string> tempOutNames;
78                 outputTypes["group"] = tempOutNames;
79                 outputTypes["list"] = tempOutNames;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "GetOtusCommand", "GetOtusCommand");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 GetOtusCommand::GetOtusCommand(string option)  {
88         try {
89                 abort = false; calledHelp = false;   
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         map<string,string>::iterator it;
103                         
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["group"] = tempOutNames;
112                         outputTypes["list"] = tempOutNames;
113                         
114                         
115                         //if the user changes the output directory command factory will send this info to us in the output parameter 
116                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
117                         
118                         //if the user changes the input directory command factory will send this info to us in the output parameter 
119                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
120                         if (inputDir == "not found"){   inputDir = "";          }
121                         else {
122                                 string path;
123                                 it = parameters.find("accnos");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
129                                 }
130                                 
131                                 it = parameters.find("list");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
137                                 }
138                                 
139                                 it = parameters.find("group");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
145                                 }
146                         }
147                         
148                         
149                         //check for required parameters
150                         accnosfile = validParameter.validFile(parameters, "accnos", true);
151                         if (accnosfile == "not open") { abort = true; }
152                         else if (accnosfile == "not found") {  accnosfile = ""; }
153                         else { m->setAccnosFile(accnosfile); }
154                         
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") {                            
158                                 groupfile = m->getGroupFile(); 
159                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
160                                 else {  m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
161                         }else { m->setGroupFile(groupfile); }   
162                         
163                         listfile = validParameter.validFile(parameters, "list", true);
164                         if (listfile == "not open") { abort = true; }
165                         else if (listfile == "not found") {                             
166                                 listfile = m->getListFile(); 
167                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
168                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
169                         }else { m->setListFile(listfile); }     
170                         
171                         groups = validParameter.validFile(parameters, "groups", false);                 
172                         if (groups == "not found") { groups = ""; }
173                         else { 
174                                 m->splitAtDash(groups, Groups);
175                         }
176                         
177                         label = validParameter.validFile(parameters, "label", false);                   
178                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
179                         
180                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
181                 }
182                 
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "GetOtusCommand", "GetOtusCommand");
186                 exit(1);
187         }
188 }
189 //**********************************************************************************************************************
190
191 int GetOtusCommand::execute(){
192         try {
193                 
194                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
195                 
196                 groupMap = new GroupMap(groupfile);
197                 groupMap->readMap();
198                 
199                 //get groups you want to get
200                 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
201                 
202                 //make sure groups are valid
203                 //takes care of user setting groupNames that are invalid or setting groups=all
204                 SharedUtil* util = new SharedUtil();
205                 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
206                 util->setGroups(Groups, gNamesOfGroups);
207                 groupMap->setNamesOfGroups(gNamesOfGroups);
208                 delete util;
209                 
210                 if (m->control_pressed) { delete groupMap; return 0; }
211                 
212                 //read through the list file keeping any otus that contain any sequence from the groups selected
213                 readListGroup();
214                 
215                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
216                                 
217                 if (outputNames.size() != 0) {
218                         m->mothurOutEndLine();
219                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
220                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
221                         m->mothurOutEndLine();
222                         
223                         //set list file as new current listfile
224                         string current = "";
225                         itTypes = outputTypes.find("group");
226                         if (itTypes != outputTypes.end()) {
227                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
228                         }
229                         
230                         itTypes = outputTypes.find("list");
231                         if (itTypes != outputTypes.end()) {
232                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
233                         }
234                 }
235                 
236                 return 0;               
237         }
238         
239         catch(exception& e) {
240                 m->errorOut(e, "GetOtusCommand", "execute");
241                 exit(1);
242         }
243 }
244 //**********************************************************************************************************************
245 int GetOtusCommand::readListGroup(){
246         try {
247                 InputData* input = new InputData(listfile, "list");
248                 ListVector* list = input->getListVector();
249                 string lastLabel = list->getLabel();
250                 
251                 //using first label seen if none is provided
252                 if (label == "") { label = lastLabel; }
253         
254         string thisOutputDir = outputDir;
255                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
256         map<string, string> variables;
257         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
258         variables["[tag]"] = label;
259         variables["[extension]"] = m->getExtension(listfile);
260                 string outputFileName = getOutputFileName("list", variables);
261                 
262                 ofstream out;
263                 m->openOutputFile(outputFileName, out);
264         
265         string GroupOutputDir = outputDir;
266                 if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
267         variables["[filename]"] = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile));
268         variables["[extension]"] = m->getExtension(groupfile);
269                 string outputGroupFileName = getOutputFileName("group", variables);
270                 
271                 ofstream outGroup;
272                 m->openOutputFile(outputGroupFileName, outGroup);
273
274                 
275                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
276                 set<string> labels; labels.insert(label);
277                 set<string> processedLabels;
278                 set<string> userLabels = labels;
279                 
280                 bool wroteSomething = false;
281
282                 //as long as you are not at the end of the file or done wih the lines you want
283                 while((list != NULL) && (userLabels.size() != 0)) {
284                         
285                         if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); m->mothurRemove(outputFileName);  m->mothurRemove(outputGroupFileName);return 0;  }
286                         
287                         if(labels.count(list->getLabel()) == 1){
288                                 processList(list, groupMap, out, outGroup, wroteSomething);
289                                 
290                                 processedLabels.insert(list->getLabel());
291                                 userLabels.erase(list->getLabel());
292                         }
293                         
294                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
295                                 string saveLabel = list->getLabel();
296                                 
297                                 delete list; 
298                                 
299                                 list = input->getListVector(lastLabel);
300                                 
301                                 processList(list, groupMap, out, outGroup, wroteSomething);
302                                 
303                                 processedLabels.insert(list->getLabel());
304                                 userLabels.erase(list->getLabel());
305                                 
306                                 //restore real lastlabel to save below
307                                 list->setLabel(saveLabel);
308                         }
309                         
310                         lastLabel = list->getLabel();
311                         
312                         delete list; list = NULL;
313                         
314                         //get next line to process
315                         list = input->getListVector();                          
316                 }
317                 
318                 
319                 if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); m->mothurRemove(outputFileName);  m->mothurRemove(outputGroupFileName); return 0;  }
320                 
321                 //output error messages about any remaining user labels
322                 set<string>::iterator it;
323                 bool needToRun = false;
324                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
325                         m->mothurOut("Your file does not include the label " + *it); 
326                         if (processedLabels.count(lastLabel) != 1) {
327                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
328                                 needToRun = true;
329                         }else {
330                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
331                         }
332                 }
333                 
334                 //run last label if you need to
335                 if (needToRun == true)  {
336                         if (list != NULL) { delete list; }
337                         
338                         list = input->getListVector(lastLabel);
339                         
340                         processList(list, groupMap, out, outGroup, wroteSomething);
341                         
342                         delete list; list = NULL;
343                 }
344                                         
345                 out.close();
346                 outGroup.close();
347                 
348                 if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file does NOT contain any otus containing sequences from the groups you wish to get."); m->mothurOutEndLine();  }
349                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
350                 outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
351                 
352                 return 0;
353                 
354         }
355         catch(exception& e) {
356                 m->errorOut(e, "GetOtusCommand", "readList");
357                 exit(1);
358         }
359 }
360 //**********************************************************************************************************************
361 int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
362         try {
363                 
364                 //make a new list vector
365                 ListVector newList;
366                 newList.setLabel(list->getLabel());
367                 
368                 int numOtus = 0;
369                 //for each bin
370         vector<string> binLabels = list->getLabels();
371         vector<string> newBinLabels;
372                 for (int i = 0; i < list->getNumBins(); i++) {
373                         if (m->control_pressed) { return 0; }
374                         
375                         //parse out names that are in accnos file
376                         string binnames = list->get(i);
377                         
378                         bool keepBin = false;
379                         string groupFileOutput = "";
380                         
381                         //parse names
382                         string individual = "";
383                         int length = binnames.length();
384                         for(int j=0;j<length;j++){
385                                 if(binnames[j] == ','){
386                                         string group = groupMap->getGroup(individual);
387                                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
388                                         
389                                         if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
390                                         groupFileOutput += individual + "\t" + group + "\n";
391                                         individual = "";        
392                                         
393                                 }
394                                 else{  individual += binnames[j];  }
395                         }
396                         
397                         string group = groupMap->getGroup(individual);
398                         if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
399                         
400                         if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
401                         groupFileOutput += individual + "\t" + group + "\n";
402                         
403                         //if there are sequences from the groups we want in this bin add to new list, output to groupfile
404                         if (keepBin) {  
405                                 newList.push_back(binnames);
406                 newBinLabels.push_back(binLabels[i]);
407                                 outGroup << groupFileOutput;
408                                 numOtus++;
409                         }
410                 }
411                 
412                 //print new listvector
413                 if (newList.getNumBins() != 0) {
414                         wroteSomething = true;
415                         newList.setLabels(newBinLabels);
416             newList.printHeaders(out);
417             newList.print(out);
418                 }
419                 
420                 m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
421         
422                 return 0;
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "GetOtusCommand", "processList");
426                 exit(1);
427         }
428 }
429 //**********************************************************************************************************************
430
431