]> git.donarmstrong.com Git - mothur.git/blob - otuhierarchycommand.cpp
fixes while testing 1.33.0
[mothur.git] / otuhierarchycommand.cpp
1 /*
2  *  otuhierarchycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "otuhierarchycommand.h"
11 #include "inputdata.h"
12
13 //**********************************************************************************************************************
14 vector<string> OtuHierarchyCommand::setParameters(){    
15         try {
16                 CommandParameter poutput("output", "Multiple", "name-number", "name", "", "", "","",false,false); parameters.push_back(poutput);
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","otuheirarchy",false,true,true); parameters.push_back(plist);
18                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "OtuHierarchyCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string OtuHierarchyCommand::getHelpString(){    
33         try {
34                 string helpString = "";
35                 helpString += "The otu.hierarchy command is used to see how otus relate at two distances. \n";
36                 helpString += "The otu.hierarchy command parameters are list, label and output.  list and label parameters are required. \n";
37                 helpString += "The output parameter allows you to output the names of the sequence in the OTUs or the OTU numbers. Options are name and number, default is name. \n";
38                 helpString += "The otu.hierarchy command should be in the following format: \n";
39                 helpString += "otu.hierarchy(list=yourListFile, label=yourLabels).\n";
40                 helpString += "Example otu.hierarchy(list=amazon.fn.list, label=0.01-0.03).\n";
41                 helpString += "The otu.hierarchy command outputs a .otu.hierarchy file which is described on the wiki.\n";
42                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
43                 return helpString;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "OtuHierarchyCommand", "getHelpString");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 string OtuHierarchyCommand::getOutputPattern(string type) {
52     try {
53         string pattern = "";
54         
55         if (type == "otuheirarchy") {  pattern = "[filename],[distance1],[tag],[distance2],otu.hierarchy"; } 
56         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
57         
58         return pattern;
59     }
60     catch(exception& e) {
61         m->errorOut(e, "OtuHierarchyCommand", "getOutputPattern");
62         exit(1);
63     }
64 }
65 //**********************************************************************************************************************
66 OtuHierarchyCommand::OtuHierarchyCommand(){     
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["otuheirarchy"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "OtuHierarchyCommand", "OtuHierarchyCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 OtuHierarchyCommand::OtuHierarchyCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 
83                 //allow user to run help
84                 if(option == "help") {  help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                 
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["otuheirarchy"] = tempOutNames;
104                         
105                         //if the user changes the input directory command factory will send this info to us in the output parameter 
106                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
107                         if (inputDir == "not found"){   inputDir = "";          }
108                         else {
109                                 string path;
110                                 it = parameters.find("list");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
116                                 }
117                         }
118
119                         listFile = validParameter.validFile(parameters, "list", true);
120                         if (listFile == "not found") { 
121                                 listFile = m->getListFile(); 
122                                 if (listFile != "") {  m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
123                                 else { 
124                                         m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
125                                         abort = true;
126                                 }
127                         }else if (listFile == "not open") { abort = true; }     
128                         else { m->setListFile(listFile); }
129                         
130                         //if the user changes the output directory command factory will send this info to us in the output parameter 
131                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
132                                 outputDir = ""; 
133                                 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it        
134                         }
135                         
136                         //check for optional parameter and set defaults
137                         // ...at some point should added some additional type checking...
138                         label = validParameter.validFile(parameters, "label", false);                   
139                         if (label == "not found") { m->mothurOut("label is a required parameter for the otu.hierarchy command."); m->mothurOutEndLine(); abort = true; }
140                         else { 
141                                 m->splitAtDash(label, mylabels);
142                                 if (mylabels.size() != 2) { m->mothurOut("You must provide 2 labels."); m->mothurOutEndLine(); abort = true; }
143                         }       
144                         
145                         output = validParameter.validFile(parameters, "output", false);                 if (output == "not found") { output = "name"; }
146                         
147                         if ((output != "name") && (output != "number")) { m->mothurOut("output options are name and number. I will use name."); m->mothurOutEndLine(); output = "name"; }
148                 }
149                 
150         }
151         catch(exception& e) {
152                 m->errorOut(e, "OtuHierarchyCommand", "OtuHierarchyCommand");
153                 exit(1);
154         }                       
155 }
156 //**********************************************************************************************************************
157
158 int OtuHierarchyCommand::execute(){
159         try {
160                 
161                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
162                 
163                 //get listvectors that correspond to labels requested, (or use smart distancing to get closest listvector)
164                 vector< vector<string> > lists = getListVectors();
165                 
166                 if (m->control_pressed) { outputTypes.clear(); return 0; }
167                 
168                 //determine which is little and which is big, putting little first
169                 if (lists.size() == 4) {
170                         //if big is first swap them
171                         if (lists[0].size() < lists[2].size()) {
172                                 vector< vector<string> > tempLists;
173                 tempLists.push_back(lists[2]);
174                 tempLists.push_back(lists[3]);
175                 tempLists.push_back(lists[0]);
176                 tempLists.push_back(lists[1]);
177                 lists = tempLists;
178                 string tempLabel = list2Label;
179                 list2Label = list1Label;
180                 list1Label = tempLabel;
181                         }
182                 }else{
183                         m->mothurOut("error getting listvectors, unable to read 2 different vectors, check your label inputs."); m->mothurOutEndLine(); return 0;
184                 }
185                 
186                 //map sequences to bin number in the "little" otu
187                 map<string, int> littleBins;
188         vector<string> binLabels0 = lists[0];
189                 for (int i = 0; i < lists[0].size(); i++) {
190                 
191                         if (m->control_pressed) {  return 0; }
192                         string bin = lists[1][i];
193             vector<string> names; m->splitAtComma(bin, names);
194                         for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; }
195         }
196                 
197                 ofstream out;
198         map<string, string> variables; 
199         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile));
200         variables["[distance1]"] = list1Label;
201         variables["[tag]"] = "-"; 
202         variables["[distance2]"] = list2Label;
203                 string outputFileName = getOutputFileName("otuheirarchy",variables);
204                 m->openOutputFile(outputFileName, out);
205                 
206                 //go through each bin in "big" otu and output the bins in "little" otu which created it
207         vector<string> binLabels1 = lists[2];
208                 for (int i = 0; i < lists[2].size(); i++) {
209                 
210                         if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
211                         
212                         string binnames = lists[3][i];
213             vector<string> names; m->splitAtComma(binnames, names);
214                         
215                         //output column 1
216                         if (output == "name")   {   out << binnames << '\t';    }
217                         else                                    {       out << binLabels1[i] << '\t';           }
218                         
219                         map<int, int> bins; //bin numbers in little that are in this bin in big
220                         map<int, int>::iterator it;
221                         
222                         //parse bin
223                         for (int j = 0; j < names.size(); j++) { bins[littleBins[names[j]]] = littleBins[names[j]];   }
224                         
225                         string col2 = "";
226                         for (it = bins.begin(); it != bins.end(); it++) {
227                                 if (output == "name")   {   col2 += lists[1][it->first] + "\t"; }
228                                 else                                    {       col2 += binLabels0[it->first] + "\t";           }
229                         }
230                         
231                         //output column 2
232                         out << col2 << endl;
233                 }
234                 
235                 out.close();
236                 
237                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0; }
238                 
239                 m->mothurOutEndLine();
240                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
241                 m->mothurOut(outputFileName); m->mothurOutEndLine();    outputNames.push_back(outputFileName); outputTypes["otuheirarchy"].push_back(outputFileName); 
242                 m->mothurOutEndLine();
243                 
244                 return 0;
245         }
246         catch(exception& e) {
247                 m->errorOut(e, "OtuHierarchyCommand", "execute");
248                 exit(1);
249         }
250 }
251
252 //**********************************************************************************************************************
253 //returns a vector of listVectors where "little" vector is first
254 vector< vector<string> > OtuHierarchyCommand::getListVectors() { //return value [0] -> otulabelsFirstLabel [1] -> binsFirstLabel [2] -> otulabelsSecondLabel [3] -> binsSecondLabel
255         try {
256                 vector< vector<string> > lists;
257         
258         int count = 0;
259         for (set<string>::iterator it = mylabels.begin(); it != mylabels.end(); it++) {
260             string realLabel;
261             vector< vector<string> > thisList = getListVector(*it, realLabel);
262             
263             if (m->control_pressed) {  return lists; }
264             
265             for (int i = 0; i < thisList.size(); i++) { lists.push_back(thisList[i]); }
266             
267             if (count == 0) {  list1Label = realLabel; count++; }
268             else {  list2Label = realLabel; }
269         }
270         
271         return lists;
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "OtuHierarchyCommand", "getListVectors");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279 vector< vector<string> > OtuHierarchyCommand::getListVector(string label, string& realLabel){ //return value [0] -> otulabels [1] -> bins
280         try {
281         vector< vector<string> > myList;
282         
283                 InputData input(listFile, "list");
284                 ListVector* list = input.getListVector();
285                 string lastLabel = list->getLabel();
286                 
287                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
288                 set<string> labels; labels.insert(label);
289                 set<string> processedLabels;
290                 set<string> userLabels = labels;
291                 
292                 //as long as you are not at the end of the file or done wih the lines you want
293                 while((list != NULL) && (userLabels.size() != 0)) {
294                         if (m->control_pressed) {  return myList;  }
295                         
296                         if(labels.count(list->getLabel()) == 1){
297                                 processedLabels.insert(list->getLabel());
298                                 userLabels.erase(list->getLabel());
299                                 break;
300                         }
301                         
302                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
303                                 string saveLabel = list->getLabel();
304                                 
305                                 delete list;
306                                 list = input.getListVector(lastLabel);
307                                 
308                                 processedLabels.insert(list->getLabel());
309                                 userLabels.erase(list->getLabel());
310                                 
311                                 //restore real lastlabel to save below
312                                 //list->setLabel(saveLabel);
313                                 break;
314                         }
315                         
316                         lastLabel = list->getLabel();
317                         
318                         //get next line to process
319                         //prevent memory leak
320                         delete list;
321                         list = input.getListVector();
322                 }
323                 
324                 
325                 if (m->control_pressed) {  return myList;  }
326                 
327                 //output error messages about any remaining user labels
328                 set<string>::iterator it;
329                 bool needToRun = false;
330                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
331                         m->mothurOut("Your file does not include the label " + *it);
332                         if (processedLabels.count(lastLabel) != 1) {
333                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
334                                 needToRun = true;
335                         }else {
336                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
337                         }
338                 }
339                 
340                 //run last label if you need to
341                 if (needToRun == true)  {
342                         delete list;
343                         list = input.getListVector(lastLabel);
344                 }
345                 
346         //at this point the list vector has the right distance
347         myList.push_back(list->getLabels());
348         vector<string> bins;
349         for (int i = 0; i < list->getNumBins(); i++) {
350             if (m->control_pressed) {  return myList;  }
351             bins.push_back(list->get(i));
352         }
353         myList.push_back(bins);
354         realLabel = list->getLabel();
355         
356         delete list;
357         
358                 return myList;
359         }
360         catch(exception& e) {
361                 m->errorOut(e, "OtuHierarchyCommand", "getListVector");
362                 exit(1);
363         }
364 }
365
366 //**********************************************************************************************************************
367
368
369
370
371