]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13
14 //**********************************************************************************************************************
15 vector<string> ClassifyOtuCommand::setParameters(){     
16         try {
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
18                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
24                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
25                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ClassifyOtuCommand::getHelpString(){     
40         try {
41                 string helpString = "";
42                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
43                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
44                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
45                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
46                 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
47                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
48                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
49                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
50                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
52                 helpString += "The default value for label is all labels in your inputfile.\n";
53                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
54                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
55                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
56                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 ClassifyOtuCommand::ClassifyOtuCommand(){       
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["constaxonomy"] = tempOutNames;
72                 outputTypes["taxsummary"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
76                 exit(1);
77         }
78 }
79
80 //**********************************************************************************************************************
81 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
82         try{
83                 abort = false; calledHelp = false;   
84                 allLines = 1;
85                 labels.clear();
86                                 
87                 //allow user to run help
88                 if (option == "help") { 
89                         help(); abort = true; calledHelp = true;
90                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string, string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string, string>::iterator it;
99                 
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["constaxonomy"] = tempOutNames;
108                         outputTypes["taxsummary"] = tempOutNames;
109                 
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("list");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("name");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("taxonomy");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
137                                 }
138                                 
139                                 it = parameters.find("reftaxonomy");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                         }
155
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
159                         
160                         //check for required parameters
161                         listfile = validParameter.validFile(parameters, "list", true);
162                         if (listfile == "not found") {                          
163                                 //if there is a current list file, use it
164                                 listfile = m->getListFile(); 
165                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
166                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
167                         }
168                         else if (listfile == "not open") { abort = true; }      
169                         else { m->setListFile(listfile); }
170                         
171                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
172                         if (taxfile == "not found") {  //if there is a current list file, use it
173                                 taxfile = m->getTaxonomyFile(); 
174                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
175                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
176                         }
177                         else if (taxfile == "not open") { abort = true; }
178                         else { m->setTaxonomyFile(taxfile); }
179                         
180                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
181                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
182                         else if (refTaxonomy == "not open") { abort = true; }
183         
184                         namefile = validParameter.validFile(parameters, "name", true);
185                         if (namefile == "not open") { namefile = ""; abort = true; }    
186                         else if (namefile == "not found") { namefile = ""; }
187                         else { m->setNameFile(namefile); }
188                         
189                         groupfile = validParameter.validFile(parameters, "group", true);
190                         if (groupfile == "not open") { abort = true; }  
191                         else if (groupfile == "not found") { groupfile = ""; }
192                         else { m->setGroupFile(groupfile); }
193                         
194                         //check for optional parameter and set defaults
195                         // ...at some point should added some additional type checking...
196                         label = validParameter.validFile(parameters, "label", false);                   
197                         if (label == "not found") { label = ""; allLines = 1;  }
198                         else { 
199                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
200                                 else { allLines = 1;  }
201                         }
202                         
203                         basis = validParameter.validFile(parameters, "basis", false);
204                         if (basis == "not found") { basis = "otu"; }    
205                         
206                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
207                         
208                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
209                         m->mothurConvert(temp, cutoff); 
210                         
211                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
212                         probs = m->isTrue(temp);
213                         
214                         
215                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
216                         
217                         if (namefile == ""){
218                                 vector<string> files; files.push_back(taxfile);
219                                 parser.getNameFile(files);
220                         }
221                         
222                 }
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
226                 exit(1);
227         }
228 }
229 //**********************************************************************************************************************
230
231 int ClassifyOtuCommand::execute(){
232         try {
233         
234                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
235                 
236                 //if user gave a namesfile then use it
237                 if (namefile != "") {   readNamesFile();        }
238                 
239                 //read taxonomy file and save in map for easy access in building bin trees
240                 readTaxonomyFile();
241                 
242                 if (m->control_pressed) { return 0; }
243                 
244                 input = new InputData(listfile, "list");
245                 list = input->getListVector();
246                 string lastLabel = list->getLabel();
247
248                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
249                 set<string> processedLabels;
250                 set<string> userLabels = labels;
251                 
252                 if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
253         
254                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
255                         
256                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
257                         
258                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
259                                         process(list);
260                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
261                                                                                 
262                                         processedLabels.insert(list->getLabel());
263                                         userLabels.erase(list->getLabel());
264                         }
265                         
266                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
267                                         string saveLabel = list->getLabel();
268                                         
269                                         delete list;
270                                         list = input->getListVector(lastLabel);
271                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
272                                         process(list);
273                                 
274                                         
275                                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
276                                                                                 
277                                         processedLabels.insert(list->getLabel());
278                                         userLabels.erase(list->getLabel());
279                                         
280                                         //restore real lastlabel to save below
281                                         list->setLabel(saveLabel);
282                         }
283                         
284                         lastLabel = list->getLabel();
285         
286                         delete list;
287                         list = input->getListVector();
288                 }
289                 
290                 //output error messages about any remaining user labels
291                 bool needToRun = false;
292                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
293                         m->mothurOut("Your file does not include the label " + (*it)); 
294                         if (processedLabels.count(lastLabel) != 1) {
295                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
296                                 needToRun = true;
297                         }else {
298                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
299                         }
300                 }
301                 
302                 //run last label if you need to
303                 if (needToRun == true)  {
304                         if (list != NULL) {     delete list;    }
305                         list = input->getListVector(lastLabel);
306                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
307                         
308                         process(list);
309                         delete list;
310                         
311                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
312                 }
313                 
314                 delete input;  
315                                 
316                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
317                 
318                 m->mothurOutEndLine();
319                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
320                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
321                 m->mothurOutEndLine();
322                 
323                 return 0;
324         }
325         catch(exception& e) {
326                 m->errorOut(e, "ClassifyOtuCommand", "execute");
327                 exit(1);
328         }
329 }
330
331 //**********************************************************************************************************************
332 int ClassifyOtuCommand::readNamesFile() {
333         try {
334                 
335                 ifstream inNames;
336                 m->openInputFile(namefile, inNames);
337                 
338                 string name, names;
339         
340                 while(!inNames.eof()){
341                         inNames >> name;                        //read from first column  A
342                         inNames >> names;               //read from second column  A,B,C,D
343                         m->gobble(inNames);
344                         
345                         //parse names into vector
346                         vector<string> theseNames;
347                         m->splitAtComma(names, theseNames);
348
349                         for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
350                         
351                         if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
352                 }
353                 inNames.close();
354                 
355                 return 0;
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "ClassifyOtuCommand", "readNamesFile");
359                 exit(1);
360         }
361 }
362 //**********************************************************************************************************************
363 int ClassifyOtuCommand::readTaxonomyFile() {
364         try {
365                 
366                 ifstream in;
367                 m->openInputFile(taxfile, in);
368                 
369                 string name, tax;
370         
371                 while(!in.eof()){
372                         in >> name >> tax;              
373                         m->gobble(in);
374                         
375                         //are there confidence scores, if so remove them
376                         if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
377                         
378                         taxMap[name] = tax;
379                         
380                         if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
381                 }
382                 in.close();
383                 
384                 return 0;
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile");
388                 exit(1);
389         }
390 }
391 //**********************************************************************************************************************
392 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
393         try{
394                 conTax = "";
395                 vector<string> names;
396                 vector<string> allNames;
397                 map<string, string>::iterator it;
398                 map<string, string>::iterator it2;
399
400                 //parse names into vector
401                 string binnames = thisList->get(bin);
402                 m->splitAtComma(binnames, names);
403
404                 //create a tree containing sequences from this bin
405                 PhyloTree* phylo = new PhyloTree();
406                 
407                 size = 0;
408                 for (int i = 0; i < names.size(); i++) {
409         
410                         //if namesfile include the names
411                         if (namefile != "") {
412         
413                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
414                                 it2 = nameMap.find(names[i]);
415                                 
416                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
417                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
418                                 }else{
419                                         
420                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
421                                         it = taxMap.find(it2->second);
422                         
423                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
424                                         
425                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
426                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
427                                         }else{
428                                 
429                                                 //add seq to tree
430                                                 phylo->addSeqToTree(names[i], it->second);
431                                                 size++;
432                                                 allNames.push_back(names[i]);
433                                         }
434                                 }
435                                 
436                         }else{
437                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
438                                 it = taxMap.find(names[i]);
439                 
440                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
441                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
442                                 }else{
443                                         //add seq to tree
444                                         phylo->addSeqToTree(names[i], it->second);
445                                         size++;
446                                         allNames.push_back(names[i]);
447                                 }
448                         }
449
450                         
451                         if (m->control_pressed) { delete phylo; return allNames; }
452                         
453                 }
454                 
455                 //build tree
456                 phylo->assignHeirarchyIDs(0);
457                 
458                 TaxNode currentNode = phylo->get(0);
459                 int myLevel = 0;        
460                 //at each level
461                 while (currentNode.children.size() != 0) { //you still have more to explore
462                 
463                         TaxNode bestChild;
464                         int bestChildSize = 0;
465                         
466                         //go through children
467                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
468                                 
469                                 TaxNode temp = phylo->get(itChild->second);
470                                 
471                                 //select child with largest accesions - most seqs assigned to it
472                                 if (temp.accessions.size() > bestChildSize) {
473                                         bestChild = phylo->get(itChild->second);
474                                         bestChildSize = temp.accessions.size();
475                                 }
476                                 
477                         }
478             
479             //phylotree adds an extra unknown so we want to remove that
480             if (bestChild.name == "unknown") { bestChildSize--; }
481                                 
482                         //is this taxonomy above cutoff
483                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
484                         
485                         if (consensusConfidence >= cutoff) { //if yes, add it
486                                 if (probs) {
487                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
488                                 }else{
489                                         conTax += bestChild.name + ";";
490                                 }
491                                 myLevel++;
492                         }else{ //if no, quit
493                                 break;
494                         }
495                         
496                         //move down a level
497                         currentNode = bestChild;
498                 }
499                 
500                 if (myLevel != phylo->getMaxLevel()) {
501                         while (myLevel != phylo->getMaxLevel()) {
502                                 conTax += "unclassified;";
503                                 myLevel++;
504                         }
505                 }               
506                 if (conTax == "") {  conTax = "no_consensus;";  }
507                 
508                 delete phylo;   
509                 
510                 return allNames;
511                         
512         }
513         catch(exception& e) {
514                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
515                 exit(1);
516         }
517 }
518
519 //**********************************************************************************************************************
520 int ClassifyOtuCommand::process(ListVector* processList) {
521         try{
522                 string conTax;
523                 int size;
524                 
525                 //create output file
526                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
527                                 
528                 ofstream out;
529                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
530                 m->openOutputFile(outputFile, out);
531                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
532                 
533                 ofstream outSum;
534                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary";
535                 m->openOutputFile(outputSumFile, outSum);
536                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
537                 
538                 out << "OTU\tSize\tTaxonomy" << endl;
539                 
540                 PhyloSummary* taxaSum;
541                 if (refTaxonomy != "") {
542                         taxaSum = new PhyloSummary(refTaxonomy, groupfile);
543                 }else {
544                         taxaSum = new PhyloSummary(groupfile);
545                 }
546                 
547
548                 //for each bin in the list vector
549         string snumBins = toString(processList->getNumBins());
550                 for (int i = 0; i < processList->getNumBins(); i++) {
551                         
552                         if (m->control_pressed) { break; }
553                         
554                         vector<string> names;
555                         names = findConsensusTaxonomy(i, processList, size, conTax);
556                 
557                         if (m->control_pressed) { out.close();  return 0; }
558                         
559                         //output to new names file
560             string binLabel = "Otu";
561             string sbinNumber = toString(i+1);
562             if (sbinNumber.length() < snumBins.length()) { 
563                 int diff = snumBins.length() - sbinNumber.length();
564                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
565             }
566             binLabel += sbinNumber;
567
568                         out << binLabel << '\t' << size << '\t' << conTax << endl;
569                         
570                         string noConfidenceConTax = conTax;
571                         m->removeConfidences(noConfidenceConTax);
572                         
573                         //add this bins taxonomy to summary
574                         if (basis == "sequence") {
575                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
576                         }else { //otu
577                                 taxaSum->addSeqToTree(noConfidenceConTax, names);
578                         }
579                 }
580
581                 out.close();
582                 
583                 //print summary file
584                 taxaSum->print(outSum);
585                 outSum.close();
586                 
587                 delete taxaSum;
588                 
589                 return 0;
590
591         }
592         catch(exception& e) {
593                 m->errorOut(e, "ClassifyOtuCommand", "process");
594                 exit(1);
595         }
596 }
597 /**************************************************************************************************/
598 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
599         try{
600                 string newTax, taxon;
601                 int level = 0;
602                 
603                 //keep what you have counting the levels
604                 while (tax.find_first_of(';') != -1) {
605                         //get taxon
606                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
607                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
608                         newTax += taxon;
609                         level++;
610                 }
611                 
612                 //add "unclassified" until you reach maxLevel
613                 while (level < maxlevel) {
614                         newTax += "unclassified;";
615                         level++;
616                 }
617                 
618                 return newTax;
619         }
620         catch(exception& e) {
621                 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
622                 exit(1);
623         }
624 }
625 //**********************************************************************************************************************
626
627