]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
changed added group output to indicator command. a few changes to work with the guy
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13 #include "sharedutilities.h"
14
15 //**********************************************************************************************************************
16 vector<string> ClassifyOtuCommand::setParameters(){     
17         try {
18                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
19                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
20                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
21         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
24         CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppersample);
25         CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
26                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
27                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
28                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string ClassifyOtuCommand::getHelpString(){     
43         try {
44                 string helpString = "";
45                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
46                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
47                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
48                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
49                 helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
50         helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
51                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
52                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
53                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
54                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
55                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
56                 helpString += "The default value for label is all labels in your inputfile.\n";
57                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
58                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
59                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
60                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
61                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
62                 return helpString;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70 string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){      
71         try {
72         string outputFileName = "";
73                 map<string, vector<string> >::iterator it;
74         
75         //is this a type this command creates
76         it = outputTypes.find(type);
77         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
78         else {
79             if (type == "constaxonomy") {  outputFileName =  "cons.taxonomy"; }
80             else if (type == "taxsummary") {  outputFileName =  "cons.tax.summary"; }
81             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
82         }
83         return outputFileName;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 ClassifyOtuCommand::ClassifyOtuCommand(){       
92         try {
93                 abort = true; calledHelp = true; 
94                 setParameters();
95                 vector<string> tempOutNames;
96                 outputTypes["constaxonomy"] = tempOutNames;
97                 outputTypes["taxsummary"] = tempOutNames;
98         }
99         catch(exception& e) {
100                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
101                 exit(1);
102         }
103 }
104
105 //**********************************************************************************************************************
106 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
107         try{
108                 abort = false; calledHelp = false;   
109                 allLines = 1;
110                 labels.clear();
111                                 
112                 //allow user to run help
113                 if (option == "help") { 
114                         help(); abort = true; calledHelp = true;
115                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
116                 else {
117                         vector<string> myArray = setParameters();
118                         
119                         OptionParser parser(option);
120                         map<string, string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         map<string, string>::iterator it;
124                 
125                         //check to make sure all parameters are valid for command
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["constaxonomy"] = tempOutNames;
133                         outputTypes["taxsummary"] = tempOutNames;
134                 
135                         //if the user changes the input directory command factory will send this info to us in the output parameter 
136                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
137                         if (inputDir == "not found"){   inputDir = "";          }
138                         else {
139                                 string path;
140                                 it = parameters.find("list");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
146                                 }
147                                 
148                                 it = parameters.find("name");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
154                                 }
155                                 
156                                 it = parameters.find("taxonomy");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
162                                 }
163                                 
164                                 it = parameters.find("reftaxonomy");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
170                                 }
171                                 
172                                 it = parameters.find("group");
173                                 //user has given a template file
174                                 if(it != parameters.end()){ 
175                                         path = m->hasPath(it->second);
176                                         //if the user has not given a path then, add inputdir. else leave path alone.
177                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
178                                 }
179                 
180                 it = parameters.find("count");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
186                                 }
187                         }
188
189                         
190                         //if the user changes the output directory command factory will send this info to us in the output parameter 
191                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
192                         
193                         //check for required parameters
194                         listfile = validParameter.validFile(parameters, "list", true);
195                         if (listfile == "not found") {                          
196                                 //if there is a current list file, use it
197                                 listfile = m->getListFile(); 
198                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
199                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
200                         }
201                         else if (listfile == "not open") { abort = true; }      
202                         else { m->setListFile(listfile); }
203                         
204                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
205                         if (taxfile == "not found") {  //if there is a current list file, use it
206                                 taxfile = m->getTaxonomyFile(); 
207                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
208                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
209                         }
210                         else if (taxfile == "not open") { abort = true; }
211                         else { m->setTaxonomyFile(taxfile); }
212                         
213                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
214                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
215                         else if (refTaxonomy == "not open") { abort = true; }
216         
217                         namefile = validParameter.validFile(parameters, "name", true);
218                         if (namefile == "not open") { namefile = ""; abort = true; }    
219                         else if (namefile == "not found") { namefile = ""; }
220                         else { m->setNameFile(namefile); }
221                         
222                         groupfile = validParameter.validFile(parameters, "group", true);
223                         if (groupfile == "not open") { abort = true; }  
224                         else if (groupfile == "not found") { groupfile = ""; }
225                         else { m->setGroupFile(groupfile); }
226             
227             countfile = validParameter.validFile(parameters, "count", true);
228                         if (countfile == "not open") { countfile = ""; abort = true; }
229                         else if (countfile == "not found") { countfile = "";  } 
230                         else { m->setCountTableFile(countfile); }
231             
232             if ((namefile != "") && (countfile != "")) {
233                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
234             }
235                         
236             if ((groupfile != "") && (countfile != "")) {
237                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
238             }
239
240                         
241                         //check for optional parameter and set defaults
242                         // ...at some point should added some additional type checking...
243                         label = validParameter.validFile(parameters, "label", false);                   
244                         if (label == "not found") { label = ""; allLines = 1;  }
245                         else { 
246                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
247                                 else { allLines = 1;  }
248                         }
249             
250                         basis = validParameter.validFile(parameters, "basis", false);
251                         if (basis == "not found") { basis = "otu"; }    
252                         
253                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
254                         
255                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
256                         m->mothurConvert(temp, cutoff); 
257                         
258                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
259                         probs = m->isTrue(temp);
260             
261             temp = validParameter.validFile(parameters, "persample", false);            if (temp == "not found"){       temp = "f";             }
262                         persample = m->isTrue(temp);
263                         
264             if ((groupfile == "") && (countfile == "")) { if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; } 
265             }
266             if (countfile != "") {
267                 CountTable cts;
268                 if (!cts.testGroups(countfile)) { 
269                     if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
270                 }
271             }
272                         
273                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
274                         
275             if (countfile == "") {
276                 if (namefile == ""){
277                     vector<string> files; files.push_back(taxfile);
278                     parser.getNameFile(files);
279                 }
280             }
281                         
282                 }
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
286                 exit(1);
287         }
288 }
289 //**********************************************************************************************************************
290
291 int ClassifyOtuCommand::execute(){
292         try {
293         
294                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
295                 
296                 //if user gave a namesfile then use it
297                 if (namefile != "")     {       m->readNames(namefile, nameMap, true);  }
298         if (groupfile != "")    {   groupMap = new GroupMap(groupfile);  groupMap->readMap();  groups = groupMap->getNamesOfGroups(); }
299         else { groupMap = NULL;  }
300         if (countfile != "") {  ct = new CountTable(); ct->readTable(countfile);  if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } }
301         else {  ct = NULL;    }
302         
303                 //read taxonomy file and save in map for easy access in building bin trees
304                 m->readTax(taxfile, taxMap);
305                 
306                 if (m->control_pressed) { return 0; }
307                 
308                 input = new InputData(listfile, "list");
309                 list = input->getListVector();
310                 string lastLabel = list->getLabel();
311
312                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
313                 set<string> processedLabels;
314                 set<string> userLabels = labels;
315                 
316                 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  }  return 0; }
317         
318                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
319                         
320                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
321                         
322                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
323                                         process(list);
324                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
325                                                                                 
326                                         processedLabels.insert(list->getLabel());
327                                         userLabels.erase(list->getLabel());
328                         }
329                         
330                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
331                                         string saveLabel = list->getLabel();
332                                         
333                                         delete list;
334                                         list = input->getListVector(lastLabel);
335                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
336                                         process(list);
337                                 
338                                         
339                                         if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
340                                                                                 
341                                         processedLabels.insert(list->getLabel());
342                                         userLabels.erase(list->getLabel());
343                                         
344                                         //restore real lastlabel to save below
345                                         list->setLabel(saveLabel);
346                         }
347                         
348                         lastLabel = list->getLabel();
349         
350                         delete list;
351                         list = input->getListVector();
352                 }
353                 
354                 //output error messages about any remaining user labels
355                 bool needToRun = false;
356                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
357                         m->mothurOut("Your file does not include the label " + (*it)); 
358                         if (processedLabels.count(lastLabel) != 1) {
359                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
360                                 needToRun = true;
361                         }else {
362                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
363                         }
364                 }
365                 
366                 //run last label if you need to
367                 if (needToRun == true)  {
368                         if (list != NULL) {     delete list;    }
369                         list = input->getListVector(lastLabel);
370                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
371                         
372                         process(list);
373                         delete list;
374                         
375                         if (m->control_pressed) { outputTypes.clear();  if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
376                 }
377                 
378                 delete input;  
379         if (groupMap != NULL) { delete groupMap; }
380         if (ct != NULL) { delete ct; }
381                                 
382                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
383                 
384                 m->mothurOutEndLine();
385                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
386                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
387                 m->mothurOutEndLine();
388                 
389                 return 0;
390         }
391         catch(exception& e) {
392                 m->errorOut(e, "ClassifyOtuCommand", "execute");
393                 exit(1);
394         }
395 }
396 //**********************************************************************************************************************
397 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) {
398         try{
399                 conTax = "";
400                 vector<string> allNames;
401                 map<string, string>::iterator it;
402                 map<string, string>::iterator it2;
403
404                 //create a tree containing sequences from this bin
405                 PhyloTree* phylo = new PhyloTree();
406                 
407                 size = 0;
408                 for (int i = 0; i < names.size(); i++) {
409         
410                         //if namesfile include the names
411                         if (namefile != "") {
412         
413                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
414                                 it2 = nameMap.find(names[i]);
415                                 
416                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
417                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
418                                 }else{
419                                         
420                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
421                                         it = taxMap.find(it2->second);
422                         
423                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
424                                         
425                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
426                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
427                                         }else{
428                                 
429                                                 //add seq to tree
430                                                 phylo->addSeqToTree(names[i], it->second);
431                                                 size++;
432                                                 allNames.push_back(names[i]);
433                                         }
434                                 }
435                                 
436                         }else{
437                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
438                                 it = taxMap.find(names[i]);
439                 
440                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
441                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
442                                 }else{
443                     if (countfile != "") {
444                         int numDups = ct->getNumSeqs(names[i]); 
445                         for (int j = 0; j < numDups; j++) {  phylo->addSeqToTree(names[i], it->second);  }
446                         size += numDups;
447                     }else{
448                                         //add seq to tree
449                         phylo->addSeqToTree(names[i], it->second);
450                         size++;  
451                     }
452                     allNames.push_back(names[i]);
453                                 }
454                         }
455
456                         
457                         if (m->control_pressed) { delete phylo; return allNames; }
458                         
459                 }
460                 
461                 //build tree
462                 phylo->assignHeirarchyIDs(0);
463                 
464                 TaxNode currentNode = phylo->get(0);
465                 int myLevel = 0;        
466                 //at each level
467                 while (currentNode.children.size() != 0) { //you still have more to explore
468                 
469                         TaxNode bestChild;
470                         int bestChildSize = 0;
471                         
472                         //go through children
473                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
474                                 
475                                 TaxNode temp = phylo->get(itChild->second);
476                                 
477                                 //select child with largest accesions - most seqs assigned to it
478                                 if (temp.accessions.size() > bestChildSize) {
479                                         bestChild = phylo->get(itChild->second);
480                                         bestChildSize = temp.accessions.size();
481                                 }
482                                 
483                         }
484             
485             //phylotree adds an extra unknown so we want to remove that
486             if (bestChild.name == "unknown") { bestChildSize--; }
487                                 
488                         //is this taxonomy above cutoff
489                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
490                         
491                         if (consensusConfidence >= cutoff) { //if yes, add it
492                                 if (probs) {
493                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
494                                 }else{
495                                         conTax += bestChild.name + ";";
496                                 }
497                                 myLevel++;
498                         }else{ //if no, quit
499                                 break;
500                         }
501                         
502                         //move down a level
503                         currentNode = bestChild;
504                 }
505                 
506                 if (myLevel != phylo->getMaxLevel()) {
507                         while (myLevel != phylo->getMaxLevel()) {
508                                 conTax += "unclassified;";
509                                 myLevel++;
510                         }
511                 }               
512                 if (conTax == "") {  conTax = "no_consensus;";  }
513                 
514                 delete phylo;   
515                 
516                 return allNames;
517                         
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
521                 exit(1);
522         }
523 }
524
525 //**********************************************************************************************************************
526 int ClassifyOtuCommand::process(ListVector* processList) {
527         try{
528                 string conTax;
529                 int size;
530                 
531                 //create output file
532                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
533                                 
534                 ofstream out;
535                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("constaxonomy");
536                 m->openOutputFile(outputFile, out);
537                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
538                 
539                 ofstream outSum;
540                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("taxsummary");
541                 m->openOutputFile(outputSumFile, outSum);
542                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
543                 
544                 out << "OTU\tSize\tTaxonomy" << endl;
545                 
546                 PhyloSummary* taxaSum;
547         if (countfile != "") {
548             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct);  }
549             else {  taxaSum = new PhyloSummary(ct); }
550                 }else {
551             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap);  }
552             else {  taxaSum = new PhyloSummary(groupMap); }
553         }
554         
555         vector<ofstream*> outSums;
556         vector<ofstream*> outs;
557         vector<PhyloSummary*> taxaSums;
558         map<string, int> groupIndex;
559         if (persample) {
560             for (int i = 0; i < groups.size(); i++) {
561                 groupIndex[groups[i]] = i;
562                 ofstream* temp = new ofstream();
563                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + groups[i] + "." +getOutputFileNameTag("constaxonomy");
564                 m->openOutputFile(outputFile, *temp);
565                 (*temp) << "OTU\tSize\tTaxonomy" << endl;
566                 outs.push_back(temp);
567                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
568                 
569                 ofstream* tempSum = new ofstream();
570                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + groups[i] + "." +getOutputFileNameTag("taxsummary");
571                 m->openOutputFile(outputSumFile, *tempSum);
572                 outSums.push_back(tempSum);
573                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
574                 
575                 PhyloSummary* taxaSumt;
576                 if (countfile != "") {
577                     if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct);  }
578                     else {  taxaSumt = new PhyloSummary(ct); }
579                 }else {
580                     if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap);  }
581                     else {  taxaSumt = new PhyloSummary(groupMap); }
582                 }
583                 taxaSums.push_back(taxaSumt);
584             }
585         }
586         
587                 //for each bin in the list vector
588         string snumBins = toString(processList->getNumBins());
589                 for (int i = 0; i < processList->getNumBins(); i++) {
590                         
591                         if (m->control_pressed) { break; }
592                         
593                         vector<string> names;
594             string binnames = processList->get(i);
595             vector<string> thisNames;
596             m->splitAtComma(binnames, thisNames);
597             
598                         names = findConsensusTaxonomy(thisNames, size, conTax);
599                 
600                         if (m->control_pressed) { break; }
601                         
602                         //output to new names file
603             string binLabel = "Otu";
604             string sbinNumber = toString(i+1);
605             if (sbinNumber.length() < snumBins.length()) { 
606                 int diff = snumBins.length() - sbinNumber.length();
607                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
608             }
609             binLabel += sbinNumber;
610
611                         out << binLabel << '\t' << size << '\t' << conTax << endl;
612                         
613                         string noConfidenceConTax = conTax;
614                         m->removeConfidences(noConfidenceConTax);
615                         
616                         //add this bins taxonomy to summary
617                         if (basis == "sequence") {
618                                 for(int j = 0; j < names.size(); j++) {  
619                     int numReps = 1;
620                     if (countfile != "") {  numReps = ct->getNumSeqs(names[j]); }
621                     for(int k = 0; k < numReps; k++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
622                 }
623                         }else { //otu
624                 map<string, bool> containsGroup; 
625                 if (countfile != "") {
626                     if (ct->hasGroupInfo()) {
627                         vector<string> mGroups = ct->getNamesOfGroups();
628                         for (int k = 0; k < names.size(); k++) {
629                             vector<int> counts = ct->getGroupCounts(names[k]);
630                             for (int h = 0; h < counts.size(); h++) {  
631                                 if (counts[h] != 0) {  containsGroup[mGroups[h]] = true; }
632                             }
633                         }
634                     }
635                 }else {
636                     if (groupfile != "") {
637                         vector<string> mGroups = groupMap->getNamesOfGroups();
638                         for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
639                         
640                         for (int k = 0; k < names.size(); k++) {
641                             //find out the sequences group
642                             string group = groupMap->getGroup(names[k]);
643                             
644                             if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
645                             else {
646                                 containsGroup[group] = true;
647                             }
648                         }
649                     }
650                 }
651                                 taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
652                         }
653             
654             
655             if (persample) {
656                 //divide names by group
657                 map<string, vector<string> > parsedNames;
658                 map<string, vector<string> >::iterator itParsed;
659                 
660                 //parse names by group
661                 for (int j = 0; j < names.size(); j++) {
662                     if (groupfile != "") { 
663                         string group = groupMap->getGroup(names[j]); 
664                         itParsed = parsedNames.find(group);
665                         
666                         if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
667                         else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
668                     }else { //count file was used
669                         vector<string> thisSeqsGroups = ct->getGroups(names[j]);
670                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
671                             string group = thisSeqsGroups[k]; 
672                             itParsed = parsedNames.find(group);
673                             
674                             if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
675                             else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
676                         }
677                     }
678                 }
679                 
680                 for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) {
681                     vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax);
682                     
683                     if (m->control_pressed) { break; }
684                     
685                     //output to new names file
686                     string binLabel = "Otu";
687                     string sbinNumber = toString(i+1);
688                     if (sbinNumber.length() < snumBins.length()) { 
689                         int diff = snumBins.length() - sbinNumber.length();
690                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
691                     }
692                     binLabel += sbinNumber;
693                     
694                     (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl;
695                     
696                     string noConfidenceConTax = conTax;
697                     m->removeConfidences(noConfidenceConTax);
698                     
699                     //add this bins taxonomy to summary
700                     if (basis == "sequence") {
701                         for(int j = 0; j < theseNames.size(); j++) {  
702                             int numReps = 1;
703                             if (countfile != "") {  numReps = ct->getGroupCount(theseNames[j], itParsed->first); } //get num seqs for this seq from this group
704                             for(int k = 0; k < numReps; k++) {  (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(theseNames[j], noConfidenceConTax);  }
705                         }
706                     }else { //otu
707                         map<string, bool> containsGroup; 
708                         containsGroup[itParsed->first] = true;
709                         (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(noConfidenceConTax, containsGroup);
710                     }
711                 }
712             }
713                 }
714
715                 out.close();
716                 
717                 //print summary file
718                 taxaSum->print(outSum);
719                 outSum.close();
720         
721         if (persample) {
722             for (int i = 0; i < groups.size(); i++) {
723                 (*outs[i]).close();
724                 taxaSums[i]->print(*outSums[i]);
725                 (*outSums[i]).close();
726                 delete outs[i];
727                 delete outSums[i];
728                 delete taxaSums[i];
729             }
730         }
731                 
732                 delete taxaSum;
733                 
734                 return 0;
735
736         }
737         catch(exception& e) {
738                 m->errorOut(e, "ClassifyOtuCommand", "process");
739                 exit(1);
740         }
741 }
742 /**************************************************************************************************/
743 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
744         try{
745                 string newTax, taxon;
746                 int level = 0;
747                 
748                 //keep what you have counting the levels
749                 while (tax.find_first_of(';') != -1) {
750                         //get taxon
751                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
752                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
753                         newTax += taxon;
754                         level++;
755                 }
756                 
757                 //add "unclassified" until you reach maxLevel
758                 while (level < maxlevel) {
759                         newTax += "unclassified;";
760                         level++;
761                 }
762                 
763                 return newTax;
764         }
765         catch(exception& e) {
766                 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
767                 exit(1);
768         }
769 }
770 //**********************************************************************************************************************
771
772