]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13
14 //**********************************************************************************************************************
15 vector<string> ClassifyOtuCommand::setParameters(){     
16         try {
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
18                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
24                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
25                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ClassifyOtuCommand::getHelpString(){     
40         try {
41                 string helpString = "";
42                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
43                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
44                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
45                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
46                 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
47                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
48                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
49                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
50                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
52                 helpString += "The default value for label is all labels in your inputfile.\n";
53                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
54                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
55                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
56                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){      
67         try {
68         string outputFileName = "";
69                 map<string, vector<string> >::iterator it;
70         
71         //is this a type this command creates
72         it = outputTypes.find(type);
73         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74         else {
75             if (type == "constaxonomy") {  outputFileName =  "cons.taxonomy"; }
76             else if (type == "taxsummary") {  outputFileName =  "cons.tax.summary"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 ClassifyOtuCommand::ClassifyOtuCommand(){       
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["constaxonomy"] = tempOutNames;
93                 outputTypes["taxsummary"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
97                 exit(1);
98         }
99 }
100
101 //**********************************************************************************************************************
102 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
103         try{
104                 abort = false; calledHelp = false;   
105                 allLines = 1;
106                 labels.clear();
107                                 
108                 //allow user to run help
109                 if (option == "help") { 
110                         help(); abort = true; calledHelp = true;
111                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string, string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string, string>::iterator it;
120                 
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["constaxonomy"] = tempOutNames;
129                         outputTypes["taxsummary"] = tempOutNames;
130                 
131                         //if the user changes the input directory command factory will send this info to us in the output parameter 
132                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
133                         if (inputDir == "not found"){   inputDir = "";          }
134                         else {
135                                 string path;
136                                 it = parameters.find("list");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
142                                 }
143                                 
144                                 it = parameters.find("name");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
150                                 }
151                                 
152                                 it = parameters.find("taxonomy");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
158                                 }
159                                 
160                                 it = parameters.find("reftaxonomy");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                         }
176
177                         
178                         //if the user changes the output directory command factory will send this info to us in the output parameter 
179                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
180                         
181                         //check for required parameters
182                         listfile = validParameter.validFile(parameters, "list", true);
183                         if (listfile == "not found") {                          
184                                 //if there is a current list file, use it
185                                 listfile = m->getListFile(); 
186                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
187                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
188                         }
189                         else if (listfile == "not open") { abort = true; }      
190                         else { m->setListFile(listfile); }
191                         
192                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
193                         if (taxfile == "not found") {  //if there is a current list file, use it
194                                 taxfile = m->getTaxonomyFile(); 
195                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
196                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
197                         }
198                         else if (taxfile == "not open") { abort = true; }
199                         else { m->setTaxonomyFile(taxfile); }
200                         
201                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
202                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
203                         else if (refTaxonomy == "not open") { abort = true; }
204         
205                         namefile = validParameter.validFile(parameters, "name", true);
206                         if (namefile == "not open") { namefile = ""; abort = true; }    
207                         else if (namefile == "not found") { namefile = ""; }
208                         else { m->setNameFile(namefile); }
209                         
210                         groupfile = validParameter.validFile(parameters, "group", true);
211                         if (groupfile == "not open") { abort = true; }  
212                         else if (groupfile == "not found") { groupfile = ""; }
213                         else { m->setGroupFile(groupfile); }
214                         
215                         //check for optional parameter and set defaults
216                         // ...at some point should added some additional type checking...
217                         label = validParameter.validFile(parameters, "label", false);                   
218                         if (label == "not found") { label = ""; allLines = 1;  }
219                         else { 
220                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
221                                 else { allLines = 1;  }
222                         }
223                         
224                         basis = validParameter.validFile(parameters, "basis", false);
225                         if (basis == "not found") { basis = "otu"; }    
226                         
227                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
228                         
229                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
230                         m->mothurConvert(temp, cutoff); 
231                         
232                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
233                         probs = m->isTrue(temp);
234                         
235                         
236                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
237                         
238                         if (namefile == ""){
239                                 vector<string> files; files.push_back(taxfile);
240                                 parser.getNameFile(files);
241                         }
242                         
243                 }
244         }
245         catch(exception& e) {
246                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
247                 exit(1);
248         }
249 }
250 //**********************************************************************************************************************
251
252 int ClassifyOtuCommand::execute(){
253         try {
254         
255                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
256                 
257                 //if user gave a namesfile then use it
258                 if (namefile != "") {   m->readNames(namefile, nameMap, true);  }
259                 
260                 //read taxonomy file and save in map for easy access in building bin trees
261                 m->readTax(taxfile, taxMap);
262                 
263                 if (m->control_pressed) { return 0; }
264                 
265                 input = new InputData(listfile, "list");
266                 list = input->getListVector();
267                 string lastLabel = list->getLabel();
268
269                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
270                 set<string> processedLabels;
271                 set<string> userLabels = labels;
272                 
273                 if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
274         
275                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
276                         
277                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
278                         
279                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
280                                         process(list);
281                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
282                                                                                 
283                                         processedLabels.insert(list->getLabel());
284                                         userLabels.erase(list->getLabel());
285                         }
286                         
287                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
288                                         string saveLabel = list->getLabel();
289                                         
290                                         delete list;
291                                         list = input->getListVector(lastLabel);
292                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
293                                         process(list);
294                                 
295                                         
296                                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
297                                                                                 
298                                         processedLabels.insert(list->getLabel());
299                                         userLabels.erase(list->getLabel());
300                                         
301                                         //restore real lastlabel to save below
302                                         list->setLabel(saveLabel);
303                         }
304                         
305                         lastLabel = list->getLabel();
306         
307                         delete list;
308                         list = input->getListVector();
309                 }
310                 
311                 //output error messages about any remaining user labels
312                 bool needToRun = false;
313                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
314                         m->mothurOut("Your file does not include the label " + (*it)); 
315                         if (processedLabels.count(lastLabel) != 1) {
316                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
317                                 needToRun = true;
318                         }else {
319                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
320                         }
321                 }
322                 
323                 //run last label if you need to
324                 if (needToRun == true)  {
325                         if (list != NULL) {     delete list;    }
326                         list = input->getListVector(lastLabel);
327                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
328                         
329                         process(list);
330                         delete list;
331                         
332                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
333                 }
334                 
335                 delete input;  
336                                 
337                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
338                 
339                 m->mothurOutEndLine();
340                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
341                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
342                 m->mothurOutEndLine();
343                 
344                 return 0;
345         }
346         catch(exception& e) {
347                 m->errorOut(e, "ClassifyOtuCommand", "execute");
348                 exit(1);
349         }
350 }
351 //**********************************************************************************************************************
352 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
353         try{
354                 conTax = "";
355                 vector<string> names;
356                 vector<string> allNames;
357                 map<string, string>::iterator it;
358                 map<string, string>::iterator it2;
359
360                 //parse names into vector
361                 string binnames = thisList->get(bin);
362                 m->splitAtComma(binnames, names);
363
364                 //create a tree containing sequences from this bin
365                 PhyloTree* phylo = new PhyloTree();
366                 
367                 size = 0;
368                 for (int i = 0; i < names.size(); i++) {
369         
370                         //if namesfile include the names
371                         if (namefile != "") {
372         
373                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
374                                 it2 = nameMap.find(names[i]);
375                                 
376                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
377                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
378                                 }else{
379                                         
380                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
381                                         it = taxMap.find(it2->second);
382                         
383                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
384                                         
385                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
386                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
387                                         }else{
388                                 
389                                                 //add seq to tree
390                                                 phylo->addSeqToTree(names[i], it->second);
391                                                 size++;
392                                                 allNames.push_back(names[i]);
393                                         }
394                                 }
395                                 
396                         }else{
397                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
398                                 it = taxMap.find(names[i]);
399                 
400                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
401                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
402                                 }else{
403                                         //add seq to tree
404                                         phylo->addSeqToTree(names[i], it->second);
405                                         size++;
406                                         allNames.push_back(names[i]);
407                                 }
408                         }
409
410                         
411                         if (m->control_pressed) { delete phylo; return allNames; }
412                         
413                 }
414                 
415                 //build tree
416                 phylo->assignHeirarchyIDs(0);
417                 
418                 TaxNode currentNode = phylo->get(0);
419                 int myLevel = 0;        
420                 //at each level
421                 while (currentNode.children.size() != 0) { //you still have more to explore
422                 
423                         TaxNode bestChild;
424                         int bestChildSize = 0;
425                         
426                         //go through children
427                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
428                                 
429                                 TaxNode temp = phylo->get(itChild->second);
430                                 
431                                 //select child with largest accesions - most seqs assigned to it
432                                 if (temp.accessions.size() > bestChildSize) {
433                                         bestChild = phylo->get(itChild->second);
434                                         bestChildSize = temp.accessions.size();
435                                 }
436                                 
437                         }
438             
439             //phylotree adds an extra unknown so we want to remove that
440             if (bestChild.name == "unknown") { bestChildSize--; }
441                                 
442                         //is this taxonomy above cutoff
443                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
444                         
445                         if (consensusConfidence >= cutoff) { //if yes, add it
446                                 if (probs) {
447                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
448                                 }else{
449                                         conTax += bestChild.name + ";";
450                                 }
451                                 myLevel++;
452                         }else{ //if no, quit
453                                 break;
454                         }
455                         
456                         //move down a level
457                         currentNode = bestChild;
458                 }
459                 
460                 if (myLevel != phylo->getMaxLevel()) {
461                         while (myLevel != phylo->getMaxLevel()) {
462                                 conTax += "unclassified;";
463                                 myLevel++;
464                         }
465                 }               
466                 if (conTax == "") {  conTax = "no_consensus;";  }
467                 
468                 delete phylo;   
469                 
470                 return allNames;
471                         
472         }
473         catch(exception& e) {
474                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
475                 exit(1);
476         }
477 }
478
479 //**********************************************************************************************************************
480 int ClassifyOtuCommand::process(ListVector* processList) {
481         try{
482                 string conTax;
483                 int size;
484                 
485                 //create output file
486                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
487                                 
488                 ofstream out;
489                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy");
490                 m->openOutputFile(outputFile, out);
491                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
492                 
493                 ofstream outSum;
494                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary");
495                 m->openOutputFile(outputSumFile, outSum);
496                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
497                 
498                 out << "OTU\tSize\tTaxonomy" << endl;
499                 
500                 PhyloSummary* taxaSum;
501                 if (refTaxonomy != "") {
502                         taxaSum = new PhyloSummary(refTaxonomy, groupfile);
503                 }else {
504                         taxaSum = new PhyloSummary(groupfile);
505                 }
506                 
507
508                 //for each bin in the list vector
509         string snumBins = toString(processList->getNumBins());
510                 for (int i = 0; i < processList->getNumBins(); i++) {
511                         
512                         if (m->control_pressed) { break; }
513                         
514                         vector<string> names;
515                         names = findConsensusTaxonomy(i, processList, size, conTax);
516                 
517                         if (m->control_pressed) { out.close();  return 0; }
518                         
519                         //output to new names file
520             string binLabel = "Otu";
521             string sbinNumber = toString(i+1);
522             if (sbinNumber.length() < snumBins.length()) { 
523                 int diff = snumBins.length() - sbinNumber.length();
524                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
525             }
526             binLabel += sbinNumber;
527
528                         out << binLabel << '\t' << size << '\t' << conTax << endl;
529                         
530                         string noConfidenceConTax = conTax;
531                         m->removeConfidences(noConfidenceConTax);
532                         
533                         //add this bins taxonomy to summary
534                         if (basis == "sequence") {
535                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
536                         }else { //otu
537                                 taxaSum->addSeqToTree(noConfidenceConTax, names);
538                         }
539                 }
540
541                 out.close();
542                 
543                 //print summary file
544                 taxaSum->print(outSum);
545                 outSum.close();
546                 
547                 delete taxaSum;
548                 
549                 return 0;
550
551         }
552         catch(exception& e) {
553                 m->errorOut(e, "ClassifyOtuCommand", "process");
554                 exit(1);
555         }
556 }
557 /**************************************************************************************************/
558 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
559         try{
560                 string newTax, taxon;
561                 int level = 0;
562                 
563                 //keep what you have counting the levels
564                 while (tax.find_first_of(';') != -1) {
565                         //get taxon
566                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
567                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
568                         newTax += taxon;
569                         level++;
570                 }
571                 
572                 //add "unclassified" until you reach maxLevel
573                 while (level < maxlevel) {
574                         newTax += "unclassified;";
575                         level++;
576                 }
577                 
578                 return newTax;
579         }
580         catch(exception& e) {
581                 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
582                 exit(1);
583         }
584 }
585 //**********************************************************************************************************************
586
587