]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
working on current change
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13
14 //**********************************************************************************************************************
15 vector<string> ClassifyOtuCommand::setParameters(){     
16         try {
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
18                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
24                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
25                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ClassifyOtuCommand::getHelpString(){     
40         try {
41                 string helpString = "";
42                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
43                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
44                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
45                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
46                 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
47                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
48                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
49                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
50                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
52                 helpString += "The default value for label is all labels in your inputfile.\n";
53                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
54                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
55                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
56                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 ClassifyOtuCommand::ClassifyOtuCommand(){       
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["constaxonomy"] = tempOutNames;
72                 outputTypes["taxsummary"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
76                 exit(1);
77         }
78 }
79
80 //**********************************************************************************************************************
81 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
82         try{
83                 abort = false; calledHelp = false;   
84                 allLines = 1;
85                 labels.clear();
86                                 
87                 //allow user to run help
88                 if (option == "help") { 
89                         help(); abort = true; calledHelp = true;
90                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string, string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string, string>::iterator it;
99                 
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["constaxonomy"] = tempOutNames;
108                         outputTypes["taxsummary"] = tempOutNames;
109                 
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("list");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("name");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("taxonomy");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
137                                 }
138                                 
139                                 it = parameters.find("reftaxonomy");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                         }
155
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
159                         
160                         //check for required parameters
161                         listfile = validParameter.validFile(parameters, "list", true);
162                         if (listfile == "not found") {                          
163                                 //if there is a current list file, use it
164                                 listfile = m->getListFile(); 
165                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
166                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
167                         }
168                         else if (listfile == "not open") { abort = true; }      
169                         else { m->setListFile(listfile); }
170                         
171                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
172                         if (taxfile == "not found") {  //if there is a current list file, use it
173                                 taxfile = m->getTaxonomyFile(); 
174                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
175                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
176                         }
177                         else if (taxfile == "not open") { abort = true; }
178                         else { m->setTaxonomyFile(taxfile); }
179                         
180                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
181                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
182                         else if (refTaxonomy == "not open") { abort = true; }
183         
184                         namefile = validParameter.validFile(parameters, "name", true);
185                         if (namefile == "not open") { abort = true; }   
186                         else if (namefile == "not found") { namefile = ""; }
187                         else { m->setNameFile(namefile); }
188                         
189                         groupfile = validParameter.validFile(parameters, "group", true);
190                         if (groupfile == "not open") { abort = true; }  
191                         else if (groupfile == "not found") { groupfile = ""; }
192                         else { m->setGroupFile(groupfile); }
193                         
194                         //check for optional parameter and set defaults
195                         // ...at some point should added some additional type checking...
196                         label = validParameter.validFile(parameters, "label", false);                   
197                         if (label == "not found") { label = ""; allLines = 1;  }
198                         else { 
199                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
200                                 else { allLines = 1;  }
201                         }
202                         
203                         basis = validParameter.validFile(parameters, "basis", false);
204                         if (basis == "not found") { basis = "otu"; }    
205                         
206                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
207                         
208                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
209                         convert(temp, cutoff); 
210                         
211                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
212                         probs = m->isTrue(temp);
213                         
214                         
215                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
216                         
217                 }
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
221                 exit(1);
222         }
223 }
224 //**********************************************************************************************************************
225
226 int ClassifyOtuCommand::execute(){
227         try {
228         
229                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
230                 
231                 //if user gave a namesfile then use it
232                 if (namefile != "") {   readNamesFile();        }
233                 
234                 //read taxonomy file and save in map for easy access in building bin trees
235                 readTaxonomyFile();
236                 
237                 if (m->control_pressed) { return 0; }
238                 
239                 input = new InputData(listfile, "list");
240                 list = input->getListVector();
241                 string lastLabel = list->getLabel();
242
243                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
244                 set<string> processedLabels;
245                 set<string> userLabels = labels;
246                 
247                 if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }  return 0; }
248         
249                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
250                         
251                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
252                         
253                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
254                                         process(list);
255                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
256                                                                                 
257                                         processedLabels.insert(list->getLabel());
258                                         userLabels.erase(list->getLabel());
259                         }
260                         
261                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
262                                         string saveLabel = list->getLabel();
263                                         
264                                         delete list;
265                                         list = input->getListVector(lastLabel);
266                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
267                                         process(list);
268                                 
269                                         
270                                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
271                                                                                 
272                                         processedLabels.insert(list->getLabel());
273                                         userLabels.erase(list->getLabel());
274                                         
275                                         //restore real lastlabel to save below
276                                         list->setLabel(saveLabel);
277                         }
278                         
279                         lastLabel = list->getLabel();
280         
281                         delete list;
282                         list = input->getListVector();
283                 }
284                 
285                 //output error messages about any remaining user labels
286                 bool needToRun = false;
287                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
288                         m->mothurOut("Your file does not include the label " + (*it)); 
289                         if (processedLabels.count(lastLabel) != 1) {
290                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
291                                 needToRun = true;
292                         }else {
293                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
294                         }
295                 }
296                 
297                 //run last label if you need to
298                 if (needToRun == true)  {
299                         if (list != NULL) {     delete list;    }
300                         list = input->getListVector(lastLabel);
301                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
302                         
303                         process(list);
304                         delete list;
305                         
306                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
307                 }
308                 
309                 delete input;  
310                                 
311                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
312                 
313                 m->mothurOutEndLine();
314                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
315                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
316                 m->mothurOutEndLine();
317                 
318                 return 0;
319         }
320         catch(exception& e) {
321                 m->errorOut(e, "ClassifyOtuCommand", "execute");
322                 exit(1);
323         }
324 }
325
326 //**********************************************************************************************************************
327 int ClassifyOtuCommand::readNamesFile() {
328         try {
329                 
330                 ifstream inNames;
331                 m->openInputFile(namefile, inNames);
332                 
333                 string name, names;
334         
335                 while(!inNames.eof()){
336                         inNames >> name;                        //read from first column  A
337                         inNames >> names;               //read from second column  A,B,C,D
338                         m->gobble(inNames);
339                         
340                         //parse names into vector
341                         vector<string> theseNames;
342                         m->splitAtComma(names, theseNames);
343
344                         for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
345                         
346                         if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
347                 }
348                 inNames.close();
349                 
350                 return 0;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "ClassifyOtuCommand", "readNamesFile");
354                 exit(1);
355         }
356 }
357 //**********************************************************************************************************************
358 int ClassifyOtuCommand::readTaxonomyFile() {
359         try {
360                 
361                 ifstream in;
362                 m->openInputFile(taxfile, in);
363                 
364                 string name, tax;
365         
366                 while(!in.eof()){
367                         in >> name >> tax;              
368                         m->gobble(in);
369                         
370                         //are there confidence scores, if so remove them
371                         if (tax.find_first_of('(') != -1) {  removeConfidences(tax);    }
372                         
373                         taxMap[name] = tax;
374                         
375                         if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
376                 }
377                 in.close();
378                 
379                 return 0;
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile");
383                 exit(1);
384         }
385 }
386 //**********************************************************************************************************************
387 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
388         try{
389                 conTax = "";
390                 vector<string> names;
391                 vector<string> allNames;
392                 map<string, string>::iterator it;
393                 map<string, string>::iterator it2;
394
395                 //parse names into vector
396                 string binnames = thisList->get(bin);
397                 m->splitAtComma(binnames, names);
398
399                 //create a tree containing sequences from this bin
400                 PhyloTree* phylo = new PhyloTree();
401                 
402                 size = 0;
403                 for (int i = 0; i < names.size(); i++) {
404         
405                         //if namesfile include the names
406                         if (namefile != "") {
407         
408                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
409                                 it2 = nameMap.find(names[i]);
410                                 
411                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
412                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
413                                 }else{
414                                         
415                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
416                                         it = taxMap.find(it2->second);
417                         
418                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
419                                         
420                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
421                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
422                                         }else{
423                                 
424                                                 //add seq to tree
425                                                 phylo->addSeqToTree(names[i], it->second);
426                                                 size++;
427                                                 allNames.push_back(names[i]);
428                                         }
429                                 }
430                                 
431                         }else{
432                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
433                                 it = taxMap.find(names[i]);
434                 
435                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
436                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
437                                 }else{
438                                         //add seq to tree
439                                         phylo->addSeqToTree(names[i], it->second);
440                                         size++;
441                                         allNames.push_back(names[i]);
442                                 }
443                         }
444
445                         
446                         if (m->control_pressed) { delete phylo; return allNames; }
447                         
448                 }
449                 
450                 //build tree
451                 phylo->assignHeirarchyIDs(0);
452                 
453                 TaxNode currentNode = phylo->get(0);
454                 
455                 //at each level
456                 while (currentNode.children.size() != 0) { //you still have more to explore
457                 
458                         TaxNode bestChild;
459                         int bestChildSize = 0;
460                         
461                         //go through children
462                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
463                                 
464                                 TaxNode temp = phylo->get(itChild->second);
465                                 
466                                 //select child with largest accesions - most seqs assigned to it
467                                 if (temp.accessions.size() > bestChildSize) {
468                                         bestChild = phylo->get(itChild->second);
469                                         bestChildSize = temp.accessions.size();
470                                 }
471                                 
472                         }
473                                 
474                         //is this taxonomy above cutoff
475                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
476                         
477                         if (consensusConfidence >= cutoff) { //if yes, add it
478                                 if (probs) {
479                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
480                                 }else{
481                                         conTax += bestChild.name + ";";
482                                 }
483                         }else{ //if no, quit
484                                 break;
485                         }
486                         
487                         //move down a level
488                         currentNode = bestChild;
489                 }
490                 
491                                 
492                 if (conTax == "") {  conTax = "no_consensus;";  }
493                 
494                 delete phylo;   
495                 
496                 return allNames;
497                         
498         }
499         catch(exception& e) {
500                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
501                 exit(1);
502         }
503 }
504
505 //**********************************************************************************************************************
506 int ClassifyOtuCommand::process(ListVector* processList) {
507         try{
508                 string conTax;
509                 int size;
510                 
511                 //create output file
512                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
513                                 
514                 ofstream out;
515                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
516                 m->openOutputFile(outputFile, out);
517                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
518                 
519                 ofstream outSum;
520                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary";
521                 m->openOutputFile(outputSumFile, outSum);
522                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
523                 
524                 out << "OTU\tSize\tTaxonomy" << endl;
525                 
526                 PhyloSummary* taxaSum;
527                 if (refTaxonomy != "") {
528                         taxaSum = new PhyloSummary(refTaxonomy, groupfile);
529                 }else {
530                         taxaSum = new PhyloSummary(groupfile);
531                 }
532                 
533                 //for each bin in the list vector
534                 for (int i = 0; i < processList->getNumBins(); i++) {
535                         
536                         if (m->control_pressed) { break; }
537                         
538                         vector<string> names;
539                         names = findConsensusTaxonomy(i, processList, size, conTax);
540                 
541                         if (m->control_pressed) { out.close();  return 0; }
542                         
543                         //output to new names file
544                         out << (i+1) << '\t' << size << '\t' << conTax << endl;
545                         
546                         string noConfidenceConTax = conTax;
547                         removeConfidences(noConfidenceConTax);
548                         
549                         //add this bins taxonomy to summary
550                         if (basis == "sequence") {
551                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
552                         }else { //otu
553                                 taxaSum->addSeqToTree(noConfidenceConTax, names);
554                         }
555                 }
556
557                 out.close();
558                 
559                 //print summary file
560                 taxaSum->print(outSum);
561                 outSum.close();
562                 
563                 delete taxaSum;
564                 
565                 return 0;
566
567         }
568         catch(exception& e) {
569                 m->errorOut(e, "ClassifyOtuCommand", "process");
570                 exit(1);
571         }
572 }
573 /**************************************************************************************************/
574 void ClassifyOtuCommand::removeConfidences(string& tax) {
575         try {
576                 
577                 string taxon;
578                 string newTax = "";
579                 
580                 while (tax.find_first_of(';') != -1) {
581                         //get taxon
582                         taxon = tax.substr(0,tax.find_first_of(';'));
583                         
584                         int pos = taxon.find_first_of('(');
585                         if (pos != -1) {
586                                 taxon = taxon.substr(0, pos); //rip off confidence 
587                         }
588                         
589                         taxon += ";";
590                         
591                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
592                         newTax += taxon;
593                 }
594                 
595                 tax = newTax;
596         }
597         catch(exception& e) {
598                 m->errorOut(e, "ClassifyOtuCommand", "removeConfidences");
599                 exit(1);
600         }
601 }
602 //**********************************************************************************************************************
603
604