]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
working on chimera.perseus. made removeConfidences function smarter. Fixed bug in...
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13
14 //**********************************************************************************************************************
15 vector<string> ClassifyOtuCommand::setParameters(){     
16         try {
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
18                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
24                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
25                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ClassifyOtuCommand::getHelpString(){     
40         try {
41                 string helpString = "";
42                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
43                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
44                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
45                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
46                 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
47                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
48                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
49                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
50                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
52                 helpString += "The default value for label is all labels in your inputfile.\n";
53                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
54                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
55                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
56                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 ClassifyOtuCommand::ClassifyOtuCommand(){       
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["constaxonomy"] = tempOutNames;
72                 outputTypes["taxsummary"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
76                 exit(1);
77         }
78 }
79
80 //**********************************************************************************************************************
81 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
82         try{
83                 abort = false; calledHelp = false;   
84                 allLines = 1;
85                 labels.clear();
86                                 
87                 //allow user to run help
88                 if (option == "help") { 
89                         help(); abort = true; calledHelp = true;
90                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string, string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string, string>::iterator it;
99                 
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["constaxonomy"] = tempOutNames;
108                         outputTypes["taxsummary"] = tempOutNames;
109                 
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("list");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("name");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("taxonomy");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
137                                 }
138                                 
139                                 it = parameters.find("reftaxonomy");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                         }
155
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
159                         
160                         //check for required parameters
161                         listfile = validParameter.validFile(parameters, "list", true);
162                         if (listfile == "not found") {                          
163                                 //if there is a current list file, use it
164                                 listfile = m->getListFile(); 
165                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
166                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
167                         }
168                         else if (listfile == "not open") { abort = true; }      
169                         else { m->setListFile(listfile); }
170                         
171                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
172                         if (taxfile == "not found") {  //if there is a current list file, use it
173                                 taxfile = m->getTaxonomyFile(); 
174                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
175                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
176                         }
177                         else if (taxfile == "not open") { abort = true; }
178                         else { m->setTaxonomyFile(taxfile); }
179                         
180                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
181                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
182                         else if (refTaxonomy == "not open") { abort = true; }
183         
184                         namefile = validParameter.validFile(parameters, "name", true);
185                         if (namefile == "not open") { abort = true; }   
186                         else if (namefile == "not found") { namefile = ""; }
187                         else { m->setNameFile(namefile); }
188                         
189                         groupfile = validParameter.validFile(parameters, "group", true);
190                         if (groupfile == "not open") { abort = true; }  
191                         else if (groupfile == "not found") { groupfile = ""; }
192                         else { m->setGroupFile(groupfile); }
193                         
194                         //check for optional parameter and set defaults
195                         // ...at some point should added some additional type checking...
196                         label = validParameter.validFile(parameters, "label", false);                   
197                         if (label == "not found") { label = ""; allLines = 1;  }
198                         else { 
199                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
200                                 else { allLines = 1;  }
201                         }
202                         
203                         basis = validParameter.validFile(parameters, "basis", false);
204                         if (basis == "not found") { basis = "otu"; }    
205                         
206                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
207                         
208                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
209                         convert(temp, cutoff); 
210                         
211                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
212                         probs = m->isTrue(temp);
213                         
214                         
215                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
216                         
217                 }
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
221                 exit(1);
222         }
223 }
224 //**********************************************************************************************************************
225
226 int ClassifyOtuCommand::execute(){
227         try {
228         
229                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
230                 
231                 //if user gave a namesfile then use it
232                 if (namefile != "") {   readNamesFile();        }
233                 
234                 //read taxonomy file and save in map for easy access in building bin trees
235                 readTaxonomyFile();
236                 
237                 if (m->control_pressed) { return 0; }
238                 
239                 input = new InputData(listfile, "list");
240                 list = input->getListVector();
241                 string lastLabel = list->getLabel();
242
243                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
244                 set<string> processedLabels;
245                 set<string> userLabels = labels;
246                 
247                 if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
248         
249                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
250                         
251                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
252                         
253                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
254                                         process(list);
255                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
256                                                                                 
257                                         processedLabels.insert(list->getLabel());
258                                         userLabels.erase(list->getLabel());
259                         }
260                         
261                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
262                                         string saveLabel = list->getLabel();
263                                         
264                                         delete list;
265                                         list = input->getListVector(lastLabel);
266                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
267                                         process(list);
268                                 
269                                         
270                                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
271                                                                                 
272                                         processedLabels.insert(list->getLabel());
273                                         userLabels.erase(list->getLabel());
274                                         
275                                         //restore real lastlabel to save below
276                                         list->setLabel(saveLabel);
277                         }
278                         
279                         lastLabel = list->getLabel();
280         
281                         delete list;
282                         list = input->getListVector();
283                 }
284                 
285                 //output error messages about any remaining user labels
286                 bool needToRun = false;
287                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
288                         m->mothurOut("Your file does not include the label " + (*it)); 
289                         if (processedLabels.count(lastLabel) != 1) {
290                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
291                                 needToRun = true;
292                         }else {
293                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
294                         }
295                 }
296                 
297                 //run last label if you need to
298                 if (needToRun == true)  {
299                         if (list != NULL) {     delete list;    }
300                         list = input->getListVector(lastLabel);
301                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
302                         
303                         process(list);
304                         delete list;
305                         
306                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
307                 }
308                 
309                 delete input;  
310                                 
311                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
312                 
313                 m->mothurOutEndLine();
314                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
315                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
316                 m->mothurOutEndLine();
317                 
318                 return 0;
319         }
320         catch(exception& e) {
321                 m->errorOut(e, "ClassifyOtuCommand", "execute");
322                 exit(1);
323         }
324 }
325
326 //**********************************************************************************************************************
327 int ClassifyOtuCommand::readNamesFile() {
328         try {
329                 
330                 ifstream inNames;
331                 m->openInputFile(namefile, inNames);
332                 
333                 string name, names;
334         
335                 while(!inNames.eof()){
336                         inNames >> name;                        //read from first column  A
337                         inNames >> names;               //read from second column  A,B,C,D
338                         m->gobble(inNames);
339                         
340                         //parse names into vector
341                         vector<string> theseNames;
342                         m->splitAtComma(names, theseNames);
343
344                         for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
345                         
346                         if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
347                 }
348                 inNames.close();
349                 
350                 return 0;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "ClassifyOtuCommand", "readNamesFile");
354                 exit(1);
355         }
356 }
357 //**********************************************************************************************************************
358 int ClassifyOtuCommand::readTaxonomyFile() {
359         try {
360                 
361                 ifstream in;
362                 m->openInputFile(taxfile, in);
363                 
364                 string name, tax;
365         
366                 while(!in.eof()){
367                         in >> name >> tax;              
368                         m->gobble(in);
369                         
370                         //are there confidence scores, if so remove them
371                         if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
372                         
373                         taxMap[name] = tax;
374                         
375                         if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
376                 }
377                 in.close();
378                 
379                 return 0;
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile");
383                 exit(1);
384         }
385 }
386 //**********************************************************************************************************************
387 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
388         try{
389                 conTax = "";
390                 vector<string> names;
391                 vector<string> allNames;
392                 map<string, string>::iterator it;
393                 map<string, string>::iterator it2;
394
395                 //parse names into vector
396                 string binnames = thisList->get(bin);
397                 m->splitAtComma(binnames, names);
398
399                 //create a tree containing sequences from this bin
400                 PhyloTree* phylo = new PhyloTree();
401                 
402                 size = 0;
403                 for (int i = 0; i < names.size(); i++) {
404         
405                         //if namesfile include the names
406                         if (namefile != "") {
407         
408                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
409                                 it2 = nameMap.find(names[i]);
410                                 
411                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
412                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
413                                 }else{
414                                         
415                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
416                                         it = taxMap.find(it2->second);
417                         
418                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
419                                         
420                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
421                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
422                                         }else{
423                                 
424                                                 //add seq to tree
425                                                 phylo->addSeqToTree(names[i], it->second);
426                                                 size++;
427                                                 allNames.push_back(names[i]);
428                                         }
429                                 }
430                                 
431                         }else{
432                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
433                                 it = taxMap.find(names[i]);
434                 
435                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
436                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
437                                 }else{
438                                         //add seq to tree
439                                         phylo->addSeqToTree(names[i], it->second);
440                                         size++;
441                                         allNames.push_back(names[i]);
442                                 }
443                         }
444
445                         
446                         if (m->control_pressed) { delete phylo; return allNames; }
447                         
448                 }
449                 
450                 //build tree
451                 phylo->assignHeirarchyIDs(0);
452                 
453                 TaxNode currentNode = phylo->get(0);
454                 int myLevel = 0;        
455                 //at each level
456                 while (currentNode.children.size() != 0) { //you still have more to explore
457                 
458                         TaxNode bestChild;
459                         int bestChildSize = 0;
460                         
461                         //go through children
462                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
463                                 
464                                 TaxNode temp = phylo->get(itChild->second);
465                                 
466                                 //select child with largest accesions - most seqs assigned to it
467                                 if (temp.accessions.size() > bestChildSize) {
468                                         bestChild = phylo->get(itChild->second);
469                                         bestChildSize = temp.accessions.size();
470                                 }
471                                 
472                         }
473                                 
474                         //is this taxonomy above cutoff
475                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
476                         
477                         if (consensusConfidence >= cutoff) { //if yes, add it
478                                 if (probs) {
479                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
480                                 }else{
481                                         conTax += bestChild.name + ";";
482                                 }
483                                 myLevel++;
484                         }else{ //if no, quit
485                                 break;
486                         }
487                         
488                         //move down a level
489                         currentNode = bestChild;
490                 }
491                 
492                 if (myLevel != phylo->getMaxLevel()) {
493                         while (myLevel != phylo->getMaxLevel()) {
494                                 conTax += "unclassified;";
495                                 myLevel++;
496                         }
497                 }               
498                 if (conTax == "") {  conTax = "no_consensus;";  }
499                 
500                 delete phylo;   
501                 
502                 return allNames;
503                         
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
507                 exit(1);
508         }
509 }
510
511 //**********************************************************************************************************************
512 int ClassifyOtuCommand::process(ListVector* processList) {
513         try{
514                 string conTax;
515                 int size;
516                 
517                 //create output file
518                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
519                                 
520                 ofstream out;
521                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
522                 m->openOutputFile(outputFile, out);
523                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
524                 
525                 ofstream outSum;
526                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary";
527                 m->openOutputFile(outputSumFile, outSum);
528                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
529                 
530                 out << "OTU\tSize\tTaxonomy" << endl;
531                 
532                 PhyloSummary* taxaSum;
533                 if (refTaxonomy != "") {
534                         taxaSum = new PhyloSummary(refTaxonomy, groupfile);
535                 }else {
536                         taxaSum = new PhyloSummary(groupfile);
537                 }
538                 
539                 //for each bin in the list vector
540                 for (int i = 0; i < processList->getNumBins(); i++) {
541                         
542                         if (m->control_pressed) { break; }
543                         
544                         vector<string> names;
545                         names = findConsensusTaxonomy(i, processList, size, conTax);
546                 
547                         if (m->control_pressed) { out.close();  return 0; }
548                         
549                         //output to new names file
550                         out << (i+1) << '\t' << size << '\t' << conTax << endl;
551                         
552                         string noConfidenceConTax = conTax;
553                         m->removeConfidences(noConfidenceConTax);
554                         
555                         //add this bins taxonomy to summary
556                         if (basis == "sequence") {
557                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
558                         }else { //otu
559                                 taxaSum->addSeqToTree(noConfidenceConTax, names);
560                         }
561                 }
562
563                 out.close();
564                 
565                 //print summary file
566                 taxaSum->print(outSum);
567                 outSum.close();
568                 
569                 delete taxaSum;
570                 
571                 return 0;
572
573         }
574         catch(exception& e) {
575                 m->errorOut(e, "ClassifyOtuCommand", "process");
576                 exit(1);
577         }
578 }
579 /**************************************************************************************************/
580 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
581         try{
582                 string newTax, taxon;
583                 int level = 0;
584                 
585                 //keep what you have counting the levels
586                 while (tax.find_first_of(';') != -1) {
587                         //get taxon
588                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
589                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
590                         newTax += taxon;
591                         level++;
592                 }
593                 
594                 //add "unclassified" until you reach maxLevel
595                 while (level < maxlevel) {
596                         newTax += "unclassified;";
597                         level++;
598                 }
599                 
600                 return newTax;
601         }
602         catch(exception& e) {
603                 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
604                 exit(1);
605         }
606 }
607 //**********************************************************************************************************************
608
609