2 * classifyotucommand.cpp
5 * Created by westcott on 6/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> ClassifyOtuCommand::setParameters(){
18 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist);
19 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true,true); parameters.push_back(ptaxonomy);
20 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy);
21 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
24 CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppersample);
25 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
26 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "","",false,false); parameters.push_back(pbasis);
27 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff);
28 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pprobs);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
41 //**********************************************************************************************************************
42 string ClassifyOtuCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, persample, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
46 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
47 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
48 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
49 helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
50 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
51 helpString += "For example consider the following basis=sequence could give Clostridiales 3 105 16 43 46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
52 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
53 helpString += "Now for basis=otu could give Clostridiales 3 7 6 1 2, where 7 is the number of otus that classified to Clostridiales.\n";
54 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
55 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
56 helpString += "The persample parameter allows you to find a consensus taxonomy for each group. Default=f\n";
57 helpString += "The default value for label is all labels in your inputfile.\n";
58 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
59 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
60 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
61 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
62 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
66 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
70 //**********************************************************************************************************************
71 string ClassifyOtuCommand::getOutputPattern(string type) {
75 if (type == "constaxonomy") { pattern = "[filename],[distance],cons.taxonomy"; }
76 else if (type == "taxsummary") { pattern = "[filename],[distance],cons.tax.summary"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
82 m->errorOut(e, "ClassifyOtuCommand", "getOutputPattern");
86 //**********************************************************************************************************************
87 ClassifyOtuCommand::ClassifyOtuCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["constaxonomy"] = tempOutNames;
93 outputTypes["taxsummary"] = tempOutNames;
96 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
101 //**********************************************************************************************************************
102 ClassifyOtuCommand::ClassifyOtuCommand(string option) {
104 abort = false; calledHelp = false;
108 //allow user to run help
109 if (option == "help") {
110 help(); abort = true; calledHelp = true;
111 }else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string, string> parameters = parser.getParameters();
118 ValidParameters validParameter;
119 map<string, string>::iterator it;
121 //check to make sure all parameters are valid for command
122 for (it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["constaxonomy"] = tempOutNames;
129 outputTypes["taxsummary"] = tempOutNames;
131 //if the user changes the input directory command factory will send this info to us in the output parameter
132 string inputDir = validParameter.validFile(parameters, "inputdir", false);
133 if (inputDir == "not found"){ inputDir = ""; }
136 it = parameters.find("list");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["list"] = inputDir + it->second; }
144 it = parameters.find("name");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["name"] = inputDir + it->second; }
152 it = parameters.find("taxonomy");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
160 it = parameters.find("reftaxonomy");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
168 it = parameters.find("group");
169 //user has given a template file
170 if(it != parameters.end()){
171 path = m->hasPath(it->second);
172 //if the user has not given a path then, add inputdir. else leave path alone.
173 if (path == "") { parameters["group"] = inputDir + it->second; }
176 it = parameters.find("count");
177 //user has given a template file
178 if(it != parameters.end()){
179 path = m->hasPath(it->second);
180 //if the user has not given a path then, add inputdir. else leave path alone.
181 if (path == "") { parameters["count"] = inputDir + it->second; }
186 //if the user changes the output directory command factory will send this info to us in the output parameter
187 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
189 //check for required parameters
190 listfile = validParameter.validFile(parameters, "list", true);
191 if (listfile == "not found") {
192 //if there is a current list file, use it
193 listfile = m->getListFile();
194 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
195 else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
197 else if (listfile == "not open") { abort = true; }
198 else { m->setListFile(listfile); }
200 taxfile = validParameter.validFile(parameters, "taxonomy", true);
201 if (taxfile == "not found") { //if there is a current list file, use it
202 taxfile = m->getTaxonomyFile();
203 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
204 else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
206 else if (taxfile == "not open") { abort = true; }
207 else { m->setTaxonomyFile(taxfile); }
209 refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
210 if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
211 else if (refTaxonomy == "not open") { abort = true; }
213 namefile = validParameter.validFile(parameters, "name", true);
214 if (namefile == "not open") { namefile = ""; abort = true; }
215 else if (namefile == "not found") { namefile = ""; }
216 else { m->setNameFile(namefile); }
218 groupfile = validParameter.validFile(parameters, "group", true);
219 if (groupfile == "not open") { abort = true; }
220 else if (groupfile == "not found") { groupfile = ""; }
221 else { m->setGroupFile(groupfile); }
223 countfile = validParameter.validFile(parameters, "count", true);
224 if (countfile == "not open") { countfile = ""; abort = true; }
225 else if (countfile == "not found") { countfile = ""; }
226 else { m->setCountTableFile(countfile); }
228 if ((namefile != "") && (countfile != "")) {
229 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
232 if ((groupfile != "") && (countfile != "")) {
233 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
237 //check for optional parameter and set defaults
238 // ...at some point should added some additional type checking...
239 label = validParameter.validFile(parameters, "label", false);
240 if (label == "not found") { label = ""; allLines = 1; }
242 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
243 else { allLines = 1; }
246 basis = validParameter.validFile(parameters, "basis", false);
247 if (basis == "not found") { basis = "otu"; }
249 if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
251 string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "51"; }
252 m->mothurConvert(temp, cutoff);
254 temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; }
255 probs = m->isTrue(temp);
257 temp = validParameter.validFile(parameters, "persample", false); if (temp == "not found"){ temp = "f"; }
258 persample = m->isTrue(temp);
260 if ((groupfile == "") && (countfile == "")) { if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
262 if (countfile != "") {
264 if (!cts.testGroups(countfile)) {
265 if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
269 if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
271 if (countfile == "") {
273 vector<string> files; files.push_back(taxfile);
274 parser.getNameFile(files);
280 catch(exception& e) {
281 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
285 //**********************************************************************************************************************
287 int ClassifyOtuCommand::execute(){
290 if (abort == true) { if (calledHelp) { return 0; } return 2; }
292 //if user gave a namesfile then use it
293 if (namefile != "") { m->readNames(namefile, nameMap, true); }
294 if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); groups = groupMap->getNamesOfGroups(); }
295 else { groupMap = NULL; }
296 if (countfile != "") { ct = new CountTable(); ct->readTable(countfile); if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } }
299 //read taxonomy file and save in map for easy access in building bin trees
300 m->readTax(taxfile, taxMap);
302 if (m->control_pressed) { return 0; }
304 input = new InputData(listfile, "list");
305 list = input->getListVector();
306 string lastLabel = list->getLabel();
308 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
309 set<string> processedLabels;
310 set<string> userLabels = labels;
312 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
314 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
316 if (allLines == 1 || labels.count(list->getLabel()) == 1){
318 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
320 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
322 processedLabels.insert(list->getLabel());
323 userLabels.erase(list->getLabel());
326 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
327 string saveLabel = list->getLabel();
330 list = input->getListVector(lastLabel);
331 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
335 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
337 processedLabels.insert(list->getLabel());
338 userLabels.erase(list->getLabel());
340 //restore real lastlabel to save below
341 list->setLabel(saveLabel);
344 lastLabel = list->getLabel();
347 list = input->getListVector();
350 //output error messages about any remaining user labels
351 bool needToRun = false;
352 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
353 m->mothurOut("Your file does not include the label " + (*it));
354 if (processedLabels.count(lastLabel) != 1) {
355 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
358 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
362 //run last label if you need to
363 if (needToRun == true) {
364 if (list != NULL) { delete list; }
365 list = input->getListVector(lastLabel);
366 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
371 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
375 if (groupMap != NULL) { delete groupMap; }
376 if (ct != NULL) { delete ct; }
378 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
380 m->mothurOutEndLine();
381 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
382 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
383 m->mothurOutEndLine();
387 catch(exception& e) {
388 m->errorOut(e, "ClassifyOtuCommand", "execute");
392 //**********************************************************************************************************************
393 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) {
396 vector<string> allNames;
397 map<string, string>::iterator it;
398 map<string, string>::iterator it2;
400 //create a tree containing sequences from this bin
401 PhyloTree* phylo = new PhyloTree();
404 for (int i = 0; i < names.size(); i++) {
406 //if namesfile include the names
407 if (namefile != "") {
409 //is this sequence in the name file - namemap maps seqName -> repSeqName
410 it2 = nameMap.find(names[i]);
412 if (it2 == nameMap.end()) { //this name is not in name file, skip it
413 m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine();
416 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
417 it = taxMap.find(it2->second);
419 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
421 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
422 else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
426 phylo->addSeqToTree(names[i], it->second);
428 allNames.push_back(names[i]);
433 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
434 it = taxMap.find(names[i]);
436 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
437 m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
439 if (countfile != "") {
440 int numDups = ct->getNumSeqs(names[i]);
441 for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); }
445 phylo->addSeqToTree(names[i], it->second);
448 allNames.push_back(names[i]);
453 if (m->control_pressed) { delete phylo; return allNames; }
458 phylo->assignHeirarchyIDs(0);
460 TaxNode currentNode = phylo->get(0);
463 while (currentNode.children.size() != 0) { //you still have more to explore
466 int bestChildSize = 0;
468 //go through children
469 for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
471 TaxNode temp = phylo->get(itChild->second);
473 //select child with largest accesions - most seqs assigned to it
474 if (temp.accessions.size() > bestChildSize) {
475 bestChild = phylo->get(itChild->second);
476 bestChildSize = temp.accessions.size();
481 //phylotree adds an extra unknown so we want to remove that
482 if (bestChild.name == "unknown") { bestChildSize--; }
484 //is this taxonomy above cutoff
485 int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
487 if (consensusConfidence >= cutoff) { //if yes, add it
489 conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
491 conTax += bestChild.name + ";";
499 currentNode = bestChild;
502 if (myLevel != phylo->getMaxLevel()) {
503 while (myLevel != phylo->getMaxLevel()) {
504 conTax += "unclassified;";
508 if (conTax == "") { conTax = "no_consensus;"; }
515 catch(exception& e) {
516 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
521 //**********************************************************************************************************************
522 int ClassifyOtuCommand::process(ListVector* processList) {
528 if (outputDir == "") { outputDir += m->hasPath(listfile); }
531 map<string, string> variables;
532 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
533 variables["[distance]"] = processList->getLabel();
534 string outputFile = getOutputFileName("constaxonomy", variables);
535 m->openOutputFile(outputFile, out);
536 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
539 string outputSumFile = getOutputFileName("taxsummary", variables);
540 m->openOutputFile(outputSumFile, outSum);
541 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
543 out << "OTU\tSize\tTaxonomy" << endl;
545 PhyloSummary* taxaSum;
546 if (countfile != "") {
547 if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
548 else { taxaSum = new PhyloSummary(ct); }
550 if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
551 else { taxaSum = new PhyloSummary(groupMap); }
554 vector<ofstream*> outSums;
555 vector<ofstream*> outs;
556 vector<PhyloSummary*> taxaSums;
557 map<string, int> groupIndex;
559 for (int i = 0; i < groups.size(); i++) {
560 groupIndex[groups[i]] = i;
561 ofstream* temp = new ofstream();
562 variables["[distance]"] = processList->getLabel() + "." + groups[i];
563 string outputFile = getOutputFileName("constaxonomy", variables);
564 m->openOutputFile(outputFile, *temp);
565 (*temp) << "OTU\tSize\tTaxonomy" << endl;
566 outs.push_back(temp);
567 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
569 ofstream* tempSum = new ofstream();
570 string outputSumFile = getOutputFileName("taxsummary", variables);
571 m->openOutputFile(outputSumFile, *tempSum);
572 outSums.push_back(tempSum);
573 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
575 PhyloSummary* taxaSumt;
576 if (countfile != "") {
577 if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct); }
578 else { taxaSumt = new PhyloSummary(ct); }
580 if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap); }
581 else { taxaSumt = new PhyloSummary(groupMap); }
583 taxaSums.push_back(taxaSumt);
587 //for each bin in the list vector
588 string snumBins = toString(processList->getNumBins());
589 for (int i = 0; i < processList->getNumBins(); i++) {
591 if (m->control_pressed) { break; }
593 vector<string> names;
594 string binnames = processList->get(i);
595 vector<string> thisNames;
596 m->splitAtComma(binnames, thisNames);
598 names = findConsensusTaxonomy(thisNames, size, conTax);
600 if (m->control_pressed) { break; }
602 //output to new names file
603 string binLabel = "Otu";
604 string sbinNumber = toString(i+1);
605 if (sbinNumber.length() < snumBins.length()) {
606 int diff = snumBins.length() - sbinNumber.length();
607 for (int h = 0; h < diff; h++) { binLabel += "0"; }
609 binLabel += sbinNumber;
611 out << binLabel << '\t' << size << '\t' << conTax << endl;
613 string noConfidenceConTax = conTax;
614 m->removeConfidences(noConfidenceConTax);
616 //add this bins taxonomy to summary
617 if (basis == "sequence") {
618 for(int j = 0; j < names.size(); j++) {
620 if (countfile != "") { numReps = ct->getNumSeqs(names[j]); }
621 for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); }
624 map<string, bool> containsGroup;
625 if (countfile != "") {
626 if (ct->hasGroupInfo()) {
627 vector<string> mGroups = ct->getNamesOfGroups();
628 for (int k = 0; k < names.size(); k++) {
629 vector<int> counts = ct->getGroupCounts(names[k]);
630 for (int h = 0; h < counts.size(); h++) {
631 if (counts[h] != 0) { containsGroup[mGroups[h]] = true; }
636 if (groupfile != "") {
637 vector<string> mGroups = groupMap->getNamesOfGroups();
638 for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
640 for (int k = 0; k < names.size(); k++) {
641 //find out the sequences group
642 string group = groupMap->getGroup(names[k]);
644 if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
646 containsGroup[group] = true;
651 taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
656 //divide names by group
657 map<string, vector<string> > parsedNames;
658 map<string, vector<string> >::iterator itParsed;
660 //parse names by group
661 for (int j = 0; j < names.size(); j++) {
662 if (groupfile != "") {
663 string group = groupMap->getGroup(names[j]);
664 itParsed = parsedNames.find(group);
666 if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
667 else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
668 }else { //count file was used
669 vector<string> thisSeqsGroups = ct->getGroups(names[j]);
670 for (int k = 0; k < thisSeqsGroups.size(); k++) {
671 string group = thisSeqsGroups[k];
672 itParsed = parsedNames.find(group);
674 if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
675 else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
680 for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) {
681 vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax);
683 if (m->control_pressed) { break; }
685 //output to new names file
686 string binLabel = "Otu";
687 string sbinNumber = toString(i+1);
688 if (sbinNumber.length() < snumBins.length()) {
689 int diff = snumBins.length() - sbinNumber.length();
690 for (int h = 0; h < diff; h++) { binLabel += "0"; }
692 binLabel += sbinNumber;
694 (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl;
696 string noConfidenceConTax = conTax;
697 m->removeConfidences(noConfidenceConTax);
699 //add this bins taxonomy to summary
700 if (basis == "sequence") {
701 for(int j = 0; j < theseNames.size(); j++) {
703 if (countfile != "") { numReps = ct->getGroupCount(theseNames[j], itParsed->first); } //get num seqs for this seq from this group
704 for(int k = 0; k < numReps; k++) { (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(theseNames[j], noConfidenceConTax); }
707 map<string, bool> containsGroup;
708 containsGroup[itParsed->first] = true;
709 (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(noConfidenceConTax, containsGroup);
718 taxaSum->print(outSum);
722 for (int i = 0; i < groups.size(); i++) {
724 taxaSums[i]->print(*outSums[i]);
725 (*outSums[i]).close();
737 catch(exception& e) {
738 m->errorOut(e, "ClassifyOtuCommand", "process");
742 /**************************************************************************************************/
743 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
745 string newTax, taxon;
748 //keep what you have counting the levels
749 while (tax.find_first_of(';') != -1) {
751 taxon = tax.substr(0,tax.find_first_of(';'))+';';
752 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
757 //add "unclassified" until you reach maxLevel
758 while (level < maxlevel) {
759 newTax += "unclassified;";
765 catch(exception& e) {
766 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
770 //**********************************************************************************************************************