]> git.donarmstrong.com Git - mothur.git/blob - classifyotucommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / classifyotucommand.cpp
1 /*
2  *  classifyotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 6/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13
14 //**********************************************************************************************************************
15 vector<string> ClassifyOtuCommand::setParameters(){     
16         try {
17                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
18                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
23                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
25                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
26                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string ClassifyOtuCommand::getHelpString(){     
41         try {
42                 string helpString = "";
43                 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
44                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
45                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
46                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
47                 helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
48         helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
49                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
50                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
51                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
52                 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
53                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
54                 helpString += "The default value for label is all labels in your inputfile.\n";
55                 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
56                 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
57                 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
58                 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){      
69         try {
70         string outputFileName = "";
71                 map<string, vector<string> >::iterator it;
72         
73         //is this a type this command creates
74         it = outputTypes.find(type);
75         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76         else {
77             if (type == "constaxonomy") {  outputFileName =  "cons.taxonomy"; }
78             else if (type == "taxsummary") {  outputFileName =  "cons.tax.summary"; }
79             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
80         }
81         return outputFileName;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag");
85                 exit(1);
86         }
87 }
88 //**********************************************************************************************************************
89 ClassifyOtuCommand::ClassifyOtuCommand(){       
90         try {
91                 abort = true; calledHelp = true; 
92                 setParameters();
93                 vector<string> tempOutNames;
94                 outputTypes["constaxonomy"] = tempOutNames;
95                 outputTypes["taxsummary"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
99                 exit(1);
100         }
101 }
102
103 //**********************************************************************************************************************
104 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
105         try{
106                 abort = false; calledHelp = false;   
107                 allLines = 1;
108                 labels.clear();
109                                 
110                 //allow user to run help
111                 if (option == "help") { 
112                         help(); abort = true; calledHelp = true;
113                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
114                 else {
115                         vector<string> myArray = setParameters();
116                         
117                         OptionParser parser(option);
118                         map<string, string> parameters = parser.getParameters();
119                         
120                         ValidParameters validParameter;
121                         map<string, string>::iterator it;
122                 
123                         //check to make sure all parameters are valid for command
124                         for (it = parameters.begin(); it != parameters.end(); it++) { 
125                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
126                         }
127                         
128                         //initialize outputTypes
129                         vector<string> tempOutNames;
130                         outputTypes["constaxonomy"] = tempOutNames;
131                         outputTypes["taxsummary"] = tempOutNames;
132                 
133                         //if the user changes the input directory command factory will send this info to us in the output parameter 
134                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
135                         if (inputDir == "not found"){   inputDir = "";          }
136                         else {
137                                 string path;
138                                 it = parameters.find("list");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
144                                 }
145                                 
146                                 it = parameters.find("name");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
152                                 }
153                                 
154                                 it = parameters.find("taxonomy");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
160                                 }
161                                 
162                                 it = parameters.find("reftaxonomy");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
168                                 }
169                                 
170                                 it = parameters.find("group");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
176                                 }
177                 
178                 it = parameters.find("count");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
184                                 }
185                         }
186
187                         
188                         //if the user changes the output directory command factory will send this info to us in the output parameter 
189                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
190                         
191                         //check for required parameters
192                         listfile = validParameter.validFile(parameters, "list", true);
193                         if (listfile == "not found") {                          
194                                 //if there is a current list file, use it
195                                 listfile = m->getListFile(); 
196                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }
199                         else if (listfile == "not open") { abort = true; }      
200                         else { m->setListFile(listfile); }
201                         
202                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
203                         if (taxfile == "not found") {  //if there is a current list file, use it
204                                 taxfile = m->getTaxonomyFile(); 
205                                 if (taxfile != "") {  m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
206                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
207                         }
208                         else if (taxfile == "not open") { abort = true; }
209                         else { m->setTaxonomyFile(taxfile); }
210                         
211                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
212                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
213                         else if (refTaxonomy == "not open") { abort = true; }
214         
215                         namefile = validParameter.validFile(parameters, "name", true);
216                         if (namefile == "not open") { namefile = ""; abort = true; }    
217                         else if (namefile == "not found") { namefile = ""; }
218                         else { m->setNameFile(namefile); }
219                         
220                         groupfile = validParameter.validFile(parameters, "group", true);
221                         if (groupfile == "not open") { abort = true; }  
222                         else if (groupfile == "not found") { groupfile = ""; }
223                         else { m->setGroupFile(groupfile); }
224             
225             countfile = validParameter.validFile(parameters, "count", true);
226                         if (countfile == "not open") { countfile = ""; abort = true; }
227                         else if (countfile == "not found") { countfile = "";  } 
228                         else { m->setCountTableFile(countfile); }
229             
230             if ((namefile != "") && (countfile != "")) {
231                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
232             }
233                         
234             if ((groupfile != "") && (countfile != "")) {
235                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
236             }
237
238                         
239                         //check for optional parameter and set defaults
240                         // ...at some point should added some additional type checking...
241                         label = validParameter.validFile(parameters, "label", false);                   
242                         if (label == "not found") { label = ""; allLines = 1;  }
243                         else { 
244                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
245                                 else { allLines = 1;  }
246                         }
247                         
248                         basis = validParameter.validFile(parameters, "basis", false);
249                         if (basis == "not found") { basis = "otu"; }    
250                         
251                         if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
252                         
253                         string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
254                         m->mothurConvert(temp, cutoff); 
255                         
256                         temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
257                         probs = m->isTrue(temp);
258                         
259                         
260                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
261                         
262             if (countfile == "") {
263                 if (namefile == ""){
264                     vector<string> files; files.push_back(taxfile);
265                     parser.getNameFile(files);
266                 }
267             }
268                         
269                 }
270         }
271         catch(exception& e) {
272                 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
273                 exit(1);
274         }
275 }
276 //**********************************************************************************************************************
277
278 int ClassifyOtuCommand::execute(){
279         try {
280         
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282                 
283                 //if user gave a namesfile then use it
284                 if (namefile != "")     {       m->readNames(namefile, nameMap, true);  }
285         if (groupfile != "")    {   groupMap = new GroupMap(groupfile);  groupMap->readMap(); }
286         else { groupMap = NULL;  }
287         if (countfile != "") {  ct = new CountTable(); ct->readTable(countfile);    }
288         else {  ct = NULL;    }
289                 
290                 //read taxonomy file and save in map for easy access in building bin trees
291                 m->readTax(taxfile, taxMap);
292                 
293                 if (m->control_pressed) { return 0; }
294                 
295                 input = new InputData(listfile, "list");
296                 list = input->getListVector();
297                 string lastLabel = list->getLabel();
298
299                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
300                 set<string> processedLabels;
301                 set<string> userLabels = labels;
302                 
303                 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  }  return 0; }
304         
305                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
306                         
307                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
308                         
309                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
310                                         process(list);
311                                         if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
312                                                                                 
313                                         processedLabels.insert(list->getLabel());
314                                         userLabels.erase(list->getLabel());
315                         }
316                         
317                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
318                                         string saveLabel = list->getLabel();
319                                         
320                                         delete list;
321                                         list = input->getListVector(lastLabel);
322                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
323                                         process(list);
324                                 
325                                         
326                                         if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
327                                                                                 
328                                         processedLabels.insert(list->getLabel());
329                                         userLabels.erase(list->getLabel());
330                                         
331                                         //restore real lastlabel to save below
332                                         list->setLabel(saveLabel);
333                         }
334                         
335                         lastLabel = list->getLabel();
336         
337                         delete list;
338                         list = input->getListVector();
339                 }
340                 
341                 //output error messages about any remaining user labels
342                 bool needToRun = false;
343                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
344                         m->mothurOut("Your file does not include the label " + (*it)); 
345                         if (processedLabels.count(lastLabel) != 1) {
346                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
347                                 needToRun = true;
348                         }else {
349                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
350                         }
351                 }
352                 
353                 //run last label if you need to
354                 if (needToRun == true)  {
355                         if (list != NULL) {     delete list;    }
356                         list = input->getListVector(lastLabel);
357                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
358                         
359                         process(list);
360                         delete list;
361                         
362                         if (m->control_pressed) { outputTypes.clear();  if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
363                 }
364                 
365                 delete input;  
366         if (groupMap != NULL) { delete groupMap; }
367         if (ct != NULL) { delete ct; }
368                                 
369                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
370                 
371                 m->mothurOutEndLine();
372                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
373                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
374                 m->mothurOutEndLine();
375                 
376                 return 0;
377         }
378         catch(exception& e) {
379                 m->errorOut(e, "ClassifyOtuCommand", "execute");
380                 exit(1);
381         }
382 }
383 //**********************************************************************************************************************
384 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
385         try{
386                 conTax = "";
387                 vector<string> names;
388                 vector<string> allNames;
389                 map<string, string>::iterator it;
390                 map<string, string>::iterator it2;
391
392                 //parse names into vector
393                 string binnames = thisList->get(bin);
394                 m->splitAtComma(binnames, names);
395
396                 //create a tree containing sequences from this bin
397                 PhyloTree* phylo = new PhyloTree();
398                 
399                 size = 0;
400                 for (int i = 0; i < names.size(); i++) {
401         
402                         //if namesfile include the names
403                         if (namefile != "") {
404         
405                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
406                                 it2 = nameMap.find(names[i]);
407                                 
408                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
409                                         m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
410                                 }else{
411                                         
412                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
413                                         it = taxMap.find(it2->second);
414                         
415                                         if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
416                                         
417                                                 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
418                                                 else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
419                                         }else{
420                                 
421                                                 //add seq to tree
422                                                 phylo->addSeqToTree(names[i], it->second);
423                                                 size++;
424                                                 allNames.push_back(names[i]);
425                                         }
426                                 }
427                                 
428                         }else{
429                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
430                                 it = taxMap.find(names[i]);
431                 
432                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
433                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
434                                 }else{
435                     if (countfile != "") {
436                         int numDups = ct->getNumSeqs(names[i]); 
437                         for (int j = 0; j < numDups; j++) {  phylo->addSeqToTree(names[i], it->second);  }
438                         size += numDups;
439                     }else{
440                                         //add seq to tree
441                         phylo->addSeqToTree(names[i], it->second);
442                         size++;  
443                     }
444                     allNames.push_back(names[i]);
445                                 }
446                         }
447
448                         
449                         if (m->control_pressed) { delete phylo; return allNames; }
450                         
451                 }
452                 
453                 //build tree
454                 phylo->assignHeirarchyIDs(0);
455                 
456                 TaxNode currentNode = phylo->get(0);
457                 int myLevel = 0;        
458                 //at each level
459                 while (currentNode.children.size() != 0) { //you still have more to explore
460                 
461                         TaxNode bestChild;
462                         int bestChildSize = 0;
463                         
464                         //go through children
465                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
466                                 
467                                 TaxNode temp = phylo->get(itChild->second);
468                                 
469                                 //select child with largest accesions - most seqs assigned to it
470                                 if (temp.accessions.size() > bestChildSize) {
471                                         bestChild = phylo->get(itChild->second);
472                                         bestChildSize = temp.accessions.size();
473                                 }
474                                 
475                         }
476             
477             //phylotree adds an extra unknown so we want to remove that
478             if (bestChild.name == "unknown") { bestChildSize--; }
479                                 
480                         //is this taxonomy above cutoff
481                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
482                         
483                         if (consensusConfidence >= cutoff) { //if yes, add it
484                                 if (probs) {
485                                         conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
486                                 }else{
487                                         conTax += bestChild.name + ";";
488                                 }
489                                 myLevel++;
490                         }else{ //if no, quit
491                                 break;
492                         }
493                         
494                         //move down a level
495                         currentNode = bestChild;
496                 }
497                 
498                 if (myLevel != phylo->getMaxLevel()) {
499                         while (myLevel != phylo->getMaxLevel()) {
500                                 conTax += "unclassified;";
501                                 myLevel++;
502                         }
503                 }               
504                 if (conTax == "") {  conTax = "no_consensus;";  }
505                 
506                 delete phylo;   
507                 
508                 return allNames;
509                         
510         }
511         catch(exception& e) {
512                 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
513                 exit(1);
514         }
515 }
516
517 //**********************************************************************************************************************
518 int ClassifyOtuCommand::process(ListVector* processList) {
519         try{
520                 string conTax;
521                 int size;
522                 
523                 //create output file
524                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
525                                 
526                 ofstream out;
527                 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("constaxonomy");
528                 m->openOutputFile(outputFile, out);
529                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
530                 
531                 ofstream outSum;
532                 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("taxsummary");
533                 m->openOutputFile(outputSumFile, outSum);
534                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
535                 
536                 out << "OTU\tSize\tTaxonomy" << endl;
537                 
538                 PhyloSummary* taxaSum;
539         if (countfile != "") {
540             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct);  }
541             else {  taxaSum = new PhyloSummary(ct); }
542                 }else {
543             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap);  }
544             else {  taxaSum = new PhyloSummary(groupMap); }
545         }
546
547                 //for each bin in the list vector
548         string snumBins = toString(processList->getNumBins());
549                 for (int i = 0; i < processList->getNumBins(); i++) {
550                         
551                         if (m->control_pressed) { break; }
552                         
553                         vector<string> names;
554                         names = findConsensusTaxonomy(i, processList, size, conTax);
555                 
556                         if (m->control_pressed) { out.close();  return 0; }
557                         
558                         //output to new names file
559             string binLabel = "Otu";
560             string sbinNumber = toString(i+1);
561             if (sbinNumber.length() < snumBins.length()) { 
562                 int diff = snumBins.length() - sbinNumber.length();
563                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
564             }
565             binLabel += sbinNumber;
566
567                         out << binLabel << '\t' << size << '\t' << conTax << endl;
568                         
569                         string noConfidenceConTax = conTax;
570                         m->removeConfidences(noConfidenceConTax);
571                         
572                         //add this bins taxonomy to summary
573                         if (basis == "sequence") {
574                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
575                         }else { //otu
576                 map<string, bool> containsGroup; 
577                 if (countfile != "") {
578                     if (ct->hasGroupInfo()) {
579                         vector<string> mGroups = ct->getNamesOfGroups();
580                         for (int k = 0; k < names.size(); k++) {
581                             vector<int> counts = ct->getGroupCounts(names[k]);
582                             for (int h = 0; h < counts.size(); h++) {  
583                                 if (counts[h] != 0) {  containsGroup[mGroups[h]] = true; }
584                             }
585                         }
586                     }
587                 }else {
588                     if (groupfile != "") {
589                         vector<string> mGroups = groupMap->getNamesOfGroups();
590                         for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
591                         
592                         for (int k = 0; k < names.size(); k++) {
593                             //find out the sequences group
594                             string group = groupMap->getGroup(names[k]);
595                             
596                             if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
597                             else {
598                                 containsGroup[group] = true;
599                             }
600                         }
601                     }
602                 }
603                                 taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
604                         }
605                 }
606
607                 out.close();
608                 
609                 //print summary file
610                 taxaSum->print(outSum);
611                 outSum.close();
612                 
613                 delete taxaSum;
614                 
615                 return 0;
616
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "ClassifyOtuCommand", "process");
620                 exit(1);
621         }
622 }
623 /**************************************************************************************************/
624 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
625         try{
626                 string newTax, taxon;
627                 int level = 0;
628                 
629                 //keep what you have counting the levels
630                 while (tax.find_first_of(';') != -1) {
631                         //get taxon
632                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
633                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
634                         newTax += taxon;
635                         level++;
636                 }
637                 
638                 //add "unclassified" until you reach maxLevel
639                 while (level < maxlevel) {
640                         newTax += "unclassified;";
641                         level++;
642                 }
643                 
644                 return newTax;
645         }
646         catch(exception& e) {
647                 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
648                 exit(1);
649         }
650 }
651 //**********************************************************************************************************************
652
653