]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
Revert to previous commit
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13
14 //**********************************************************************************************************************
15 vector<string> ClassifyOtuCommand::setParameters(){     
16         try {
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
18                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
24                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
25                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ClassifyOtuCommand::getHelpString(){     
40         try {
41                 string helpString = "";
42                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
43                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
44                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
45                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
46                 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
47                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
48                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
49                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
50                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
52                 helpString += "The default value for label is all labels in your inputfile.\n";
53                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
54                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
55                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
56                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 ClassifyOtuCommand::ClassifyOtuCommand(){       
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["constaxonomy"] = tempOutNames;
72                 outputTypes["taxsummary"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
76                 exit(1);
77         }
78 }
79
80 //**********************************************************************************************************************
81 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
82         try{
83                 abort = false; calledHelp = false;   
84                 allLines = 1;
85                 labels.clear();
86                                 
87                 //allow user to run help
88                 if (option == "help") { 
89                         help(); abort = true; calledHelp = true;
90                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string, string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string, string>::iterator it;
99                 
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["constaxonomy"] = tempOutNames;
108                         outputTypes["taxsummary"] = tempOutNames;
109                 
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("list");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("name");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("taxonomy");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
137                                 }
138                                 
139                                 it = parameters.find("reftaxonomy");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                         }
155
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
159                         
160                         //check for required parameters
161                         listfile = validParameter.validFile(parameters, "list", true);
162                         if (listfile == "not found") {                          
163                                 //if there is a current list file, use it
164                                 listfile = m->getListFile(); 
165                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
166                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
167                         }
168                         else if (listfile == "not open") { abort = true; }      
169                         else { m->setListFile(listfile); }
170                         
171                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
172                         if (taxfile == "not found") {  //if there is a current list file, use it
173                                 taxfile = m->getTaxonomyFile(); 
174                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
175                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
176                         }
177                         else if (taxfile == "not open") { abort = true; }
178                         else { m->setTaxonomyFile(taxfile); }
179                         
180                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
181                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
182                         else if (refTaxonomy == "not open") { abort = true; }
183         
184                         namefile = validParameter.validFile(parameters, "name", true);
185                         if (namefile == "not open") { namefile = ""; abort = true; }    
186                         else if (namefile == "not found") { namefile = ""; }
187                         else { m->setNameFile(namefile); }
188                         
189                         groupfile = validParameter.validFile(parameters, "group", true);
190                         if (groupfile == "not open") { abort = true; }  
191                         else if (groupfile == "not found") { groupfile = ""; }
192                         else { m->setGroupFile(groupfile); }
193                         
194                         //check for optional parameter and set defaults
195                         // ...at some point should added some additional type checking...
196                         label = validParameter.validFile(parameters, "label", false);                   
197                         if (label == "not found") { label = ""; allLines = 1;  }
198                         else { 
199                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
200                                 else { allLines = 1;  }
201                         }
202                         
203                         basis = validParameter.validFile(parameters, "basis", false);
204                         if (basis == "not found") { basis = "otu"; }    
205                         
206                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
207                         
208                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
209                         m->mothurConvert(temp, cutoff); 
210                         
211                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
212                         probs = m->isTrue(temp);
213                         
214                         
215                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
216                         
217                         if (namefile == ""){
218                                 vector<string> files; files.push_back(taxfile);
219                                 parser.getNameFile(files);
220                         }
221                         
222                 }
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
226                 exit(1);
227         }
228 }
229 //**********************************************************************************************************************
230
231 int ClassifyOtuCommand::execute(){
232         try {
233         
234                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
235                 
236                 //if user gave a namesfile then use it
237                 if (namefile != "") {   readNamesFile();        }
238                 
239                 //read taxonomy file and save in map for easy access in building bin trees
240                 readTaxonomyFile();
241                 
242                 if (m->control_pressed) { return 0; }
243                 
244                 input = new InputData(listfile, "list");
245                 list = input->getListVector();
246                 string lastLabel = list->getLabel();
247
248                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
249                 set<string> processedLabels;
250                 set<string> userLabels = labels;
251                 
252                 if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
253         
254                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
255                         
256                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
257                         
258                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
259                                         process(list);
260                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
261                                                                                 
262                                         processedLabels.insert(list->getLabel());
263                                         userLabels.erase(list->getLabel());
264                         }
265                         
266                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
267                                         string saveLabel = list->getLabel();
268                                         
269                                         delete list;
270                                         list = input->getListVector(lastLabel);
271                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
272                                         process(list);
273                                 
274                                         
275                                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
276                                                                                 
277                                         processedLabels.insert(list->getLabel());
278                                         userLabels.erase(list->getLabel());
279                                         
280                                         //restore real lastlabel to save below
281                                         list->setLabel(saveLabel);
282                         }
283                         
284                         lastLabel = list->getLabel();
285         
286                         delete list;
287                         list = input->getListVector();
288                 }
289                 
290                 //output error messages about any remaining user labels
291                 bool needToRun = false;
292                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
293                         m->mothurOut("Your file does not include the label " + (*it)); 
294                         if (processedLabels.count(lastLabel) != 1) {
295                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
296                                 needToRun = true;
297                         }else {
298                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
299                         }
300                 }
301                 
302                 //run last label if you need to
303                 if (needToRun == true)  {
304                         if (list != NULL) {     delete list;    }
305                         list = input->getListVector(lastLabel);
306                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
307                         
308                         process(list);
309                         delete list;
310                         
311                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
312                 }
313                 
314                 delete input;  
315                                 
316                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
317                 
318                 m->mothurOutEndLine();
319                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
320                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
321                 m->mothurOutEndLine();
322                 
323                 return 0;
324         }
325         catch(exception& e) {
326                 m->errorOut(e, "ClassifyOtuCommand", "execute");
327                 exit(1);
328         }
329 }
330
331 //**********************************************************************************************************************
332 int ClassifyOtuCommand::readNamesFile() {
333         try {
334                 
335                 ifstream inNames;
336                 m->openInputFile(namefile, inNames);
337                 
338                 string name, names;
339         
340                 while(!inNames.eof()){
341                         inNames >> name;                        //read from first column  A
342                         inNames >> names;               //read from second column  A,B,C,D
343                         m->gobble(inNames);
344                         
345                         //parse names into vector
346                         vector<string> theseNames;
347                         m->splitAtComma(names, theseNames);
348
349                         for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
350                         
351                         if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
352                 }
353                 inNames.close();
354                 
355                 return 0;
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "ClassifyOtuCommand", "readNamesFile");
359                 exit(1);
360         }
361 }
362 //**********************************************************************************************************************
363 int ClassifyOtuCommand::readTaxonomyFile() {
364         try {
365                 
366                 ifstream in;
367                 m->openInputFile(taxfile, in);
368                 
369                 string name, tax;
370         
371                 while(!in.eof()){
372                         in >> name >> tax;              
373                         m->gobble(in);
374                         
375                         //are there confidence scores, if so remove them
376                         if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
377                         
378                         taxMap[name] = tax;
379                         
380                         if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
381                 }
382                 in.close();
383                 
384                 return 0;
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile");
388                 exit(1);
389         }
390 }
391 //**********************************************************************************************************************
392 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
393         try{
394                 conTax = "";
395                 vector<string> names;
396                 vector<string> allNames;
397                 map<string, string>::iterator it;
398                 map<string, string>::iterator it2;
399
400                 //parse names into vector
401                 string binnames = thisList->get(bin);
402                 m->splitAtComma(binnames, names);
403
404                 //create a tree containing sequences from this bin
405                 PhyloTree* phylo = new PhyloTree();
406                 
407                 size = 0;
408                 for (int i = 0; i < names.size(); i++) {
409         
410                         //if namesfile include the names
411                         if (namefile != "") {
412         
413                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
414                                 it2 = nameMap.find(names[i]);
415                                 
416                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
417                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
418                                 }else{
419                                         
420                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
421                                         it = taxMap.find(it2->second);
422                         
423                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
424                                         
425                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
426                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
427                                         }else{
428                                 
429                                                 //add seq to tree
430                                                 phylo->addSeqToTree(names[i], it->second);
431                                                 size++;
432                                                 allNames.push_back(names[i]);
433                                         }
434                                 }
435                                 
436                         }else{
437                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
438                                 it = taxMap.find(names[i]);
439                 
440                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
441                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
442                                 }else{
443                                         //add seq to tree
444                                         phylo->addSeqToTree(names[i], it->second);
445                                         size++;
446                                         allNames.push_back(names[i]);
447                                 }
448                         }
449
450                         
451                         if (m->control_pressed) { delete phylo; return allNames; }
452                         
453                 }
454                 
455                 //build tree
456                 phylo->assignHeirarchyIDs(0);
457                 
458                 TaxNode currentNode = phylo->get(0);
459                 int myLevel = 0;        
460                 //at each level
461                 while (currentNode.children.size() != 0) { //you still have more to explore
462                 
463                         TaxNode bestChild;
464                         int bestChildSize = 0;
465                         
466                         //go through children
467                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
468                                 
469                                 TaxNode temp = phylo->get(itChild->second);
470                                 
471                                 //select child with largest accesions - most seqs assigned to it
472                                 if (temp.accessions.size() > bestChildSize) {
473                                         bestChild = phylo->get(itChild->second);
474                                         bestChildSize = temp.accessions.size();
475                                 }
476                                 
477                         }
478                                 
479                         //is this taxonomy above cutoff
480                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
481                         
482                         if (consensusConfidence >= cutoff) { //if yes, add it
483                                 if (probs) {
484                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
485                                 }else{
486                                         conTax += bestChild.name + ";";
487                                 }
488                                 myLevel++;
489                         }else{ //if no, quit
490                                 break;
491                         }
492                         
493                         //move down a level
494                         currentNode = bestChild;
495                 }
496                 
497                 if (myLevel != phylo->getMaxLevel()) {
498                         while (myLevel != phylo->getMaxLevel()) {
499                                 conTax += "unclassified;";
500                                 myLevel++;
501                         }
502                 }               
503                 if (conTax == "") {  conTax = "no_consensus;";  }
504                 
505                 delete phylo;   
506                 
507                 return allNames;
508                         
509         }
510         catch(exception& e) {
511                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
512                 exit(1);
513         }
514 }
515
516 //**********************************************************************************************************************
517 int ClassifyOtuCommand::process(ListVector* processList) {
518         try{
519                 string conTax;
520                 int size;
521                 
522                 //create output file
523                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
524                                 
525                 ofstream out;
526                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
527                 m->openOutputFile(outputFile, out);
528                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
529                 
530                 ofstream outSum;
531                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary";
532                 m->openOutputFile(outputSumFile, outSum);
533                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
534                 
535                 out << "OTU\tSize\tTaxonomy" << endl;
536                 
537                 PhyloSummary* taxaSum;
538                 if (refTaxonomy != "") {
539                         taxaSum = new PhyloSummary(refTaxonomy, groupfile);
540                 }else {
541                         taxaSum = new PhyloSummary(groupfile);
542                 }
543                 
544                 //for each bin in the list vector
545                 for (int i = 0; i < processList->getNumBins(); i++) {
546                         
547                         if (m->control_pressed) { break; }
548                         
549                         vector<string> names;
550                         names = findConsensusTaxonomy(i, processList, size, conTax);
551                 
552                         if (m->control_pressed) { out.close();  return 0; }
553                         
554                         //output to new names file
555                         out << (i+1) << '\t' << size << '\t' << conTax << endl;
556                         
557                         string noConfidenceConTax = conTax;
558                         m->removeConfidences(noConfidenceConTax);
559                         
560                         //add this bins taxonomy to summary
561                         if (basis == "sequence") {
562                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
563                         }else { //otu
564                                 taxaSum->addSeqToTree(noConfidenceConTax, names);
565                         }
566                 }
567
568                 out.close();
569                 
570                 //print summary file
571                 taxaSum->print(outSum);
572                 outSum.close();
573                 
574                 delete taxaSum;
575                 
576                 return 0;
577
578         }
579         catch(exception& e) {
580                 m->errorOut(e, "ClassifyOtuCommand", "process");
581                 exit(1);
582         }
583 }
584 /**************************************************************************************************/
585 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
586         try{
587                 string newTax, taxon;
588                 int level = 0;
589                 
590                 //keep what you have counting the levels
591                 while (tax.find_first_of(';') != -1) {
592                         //get taxon
593                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
594                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
595                         newTax += taxon;
596                         level++;
597                 }
598                 
599                 //add "unclassified" until you reach maxLevel
600                 while (level < maxlevel) {
601                         newTax += "unclassified;";
602                         level++;
603                 }
604                 
605                 return newTax;
606         }
607         catch(exception& e) {
608                 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
609                 exit(1);
610         }
611 }
612 //**********************************************************************************************************************
613
614