2 * classifyotucommand.cpp
5 * Created by westcott on 6/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> ClassifyOtuCommand::setParameters(){
18 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist);
19 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true,true); parameters.push_back(ptaxonomy);
20 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy);
21 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
24 CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppersample);
25 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
26 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "","",false,false); parameters.push_back(pbasis);
27 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff);
28 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pprobs);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
41 //**********************************************************************************************************************
42 string ClassifyOtuCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
46 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
47 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
48 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
49 helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
50 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
51 helpString += "For example consider the following basis=sequence could give Clostridiales 3 105 16 43 46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
52 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
53 helpString += "Now for basis=otu could give Clostridiales 3 7 6 1 2, where 7 is the number of otus that classified to Clostridiales.\n";
54 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
55 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
56 helpString += "The default value for label is all labels in your inputfile.\n";
57 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
58 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
59 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
60 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
61 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
65 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
69 //**********************************************************************************************************************
70 string ClassifyOtuCommand::getOutputPattern(string type) {
74 if (type == "constaxonomy") { pattern = "[filename],[distance],cons.taxonomy"; }
75 else if (type == "taxsummary") { pattern = "[filename],[distance],cons.tax.summary"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
81 m->errorOut(e, "ClassifyOtuCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 ClassifyOtuCommand::ClassifyOtuCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["constaxonomy"] = tempOutNames;
92 outputTypes["taxsummary"] = tempOutNames;
95 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
100 //**********************************************************************************************************************
101 ClassifyOtuCommand::ClassifyOtuCommand(string option) {
103 abort = false; calledHelp = false;
107 //allow user to run help
108 if (option == "help") {
109 help(); abort = true; calledHelp = true;
110 }else if(option == "citation") { citation(); abort = true; calledHelp = true;}
112 vector<string> myArray = setParameters();
114 OptionParser parser(option);
115 map<string, string> parameters = parser.getParameters();
117 ValidParameters validParameter;
118 map<string, string>::iterator it;
120 //check to make sure all parameters are valid for command
121 for (it = parameters.begin(); it != parameters.end(); it++) {
122 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
125 //initialize outputTypes
126 vector<string> tempOutNames;
127 outputTypes["constaxonomy"] = tempOutNames;
128 outputTypes["taxsummary"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("list");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["list"] = inputDir + it->second; }
143 it = parameters.find("name");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["name"] = inputDir + it->second; }
151 it = parameters.find("taxonomy");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
159 it = parameters.find("reftaxonomy");
160 //user has given a template file
161 if(it != parameters.end()){
162 path = m->hasPath(it->second);
163 //if the user has not given a path then, add inputdir. else leave path alone.
164 if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
167 it = parameters.find("group");
168 //user has given a template file
169 if(it != parameters.end()){
170 path = m->hasPath(it->second);
171 //if the user has not given a path then, add inputdir. else leave path alone.
172 if (path == "") { parameters["group"] = inputDir + it->second; }
175 it = parameters.find("count");
176 //user has given a template file
177 if(it != parameters.end()){
178 path = m->hasPath(it->second);
179 //if the user has not given a path then, add inputdir. else leave path alone.
180 if (path == "") { parameters["count"] = inputDir + it->second; }
185 //if the user changes the output directory command factory will send this info to us in the output parameter
186 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
188 //check for required parameters
189 listfile = validParameter.validFile(parameters, "list", true);
190 if (listfile == "not found") {
191 //if there is a current list file, use it
192 listfile = m->getListFile();
193 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
194 else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
196 else if (listfile == "not open") { abort = true; }
197 else { m->setListFile(listfile); }
199 taxfile = validParameter.validFile(parameters, "taxonomy", true);
200 if (taxfile == "not found") { //if there is a current list file, use it
201 taxfile = m->getTaxonomyFile();
202 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
203 else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
205 else if (taxfile == "not open") { abort = true; }
206 else { m->setTaxonomyFile(taxfile); }
208 refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
209 if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
210 else if (refTaxonomy == "not open") { abort = true; }
212 namefile = validParameter.validFile(parameters, "name", true);
213 if (namefile == "not open") { namefile = ""; abort = true; }
214 else if (namefile == "not found") { namefile = ""; }
215 else { m->setNameFile(namefile); }
217 groupfile = validParameter.validFile(parameters, "group", true);
218 if (groupfile == "not open") { abort = true; }
219 else if (groupfile == "not found") { groupfile = ""; }
220 else { m->setGroupFile(groupfile); }
222 countfile = validParameter.validFile(parameters, "count", true);
223 if (countfile == "not open") { countfile = ""; abort = true; }
224 else if (countfile == "not found") { countfile = ""; }
225 else { m->setCountTableFile(countfile); }
227 if ((namefile != "") && (countfile != "")) {
228 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
231 if ((groupfile != "") && (countfile != "")) {
232 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
236 //check for optional parameter and set defaults
237 // ...at some point should added some additional type checking...
238 label = validParameter.validFile(parameters, "label", false);
239 if (label == "not found") { label = ""; allLines = 1; }
241 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
242 else { allLines = 1; }
245 basis = validParameter.validFile(parameters, "basis", false);
246 if (basis == "not found") { basis = "otu"; }
248 if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
250 string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "51"; }
251 m->mothurConvert(temp, cutoff);
253 temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; }
254 probs = m->isTrue(temp);
256 temp = validParameter.validFile(parameters, "persample", false); if (temp == "not found"){ temp = "f"; }
257 persample = m->isTrue(temp);
259 if ((groupfile == "") && (countfile == "")) { if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
261 if (countfile != "") {
263 if (!cts.testGroups(countfile)) {
264 if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
268 if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
270 if (countfile == "") {
272 vector<string> files; files.push_back(taxfile);
273 parser.getNameFile(files);
279 catch(exception& e) {
280 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
284 //**********************************************************************************************************************
286 int ClassifyOtuCommand::execute(){
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 //if user gave a namesfile then use it
292 if (namefile != "") { m->readNames(namefile, nameMap, true); }
293 if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); groups = groupMap->getNamesOfGroups(); }
294 else { groupMap = NULL; }
295 if (countfile != "") { ct = new CountTable(); ct->readTable(countfile); if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } }
298 //read taxonomy file and save in map for easy access in building bin trees
299 m->readTax(taxfile, taxMap);
301 if (m->control_pressed) { return 0; }
303 input = new InputData(listfile, "list");
304 list = input->getListVector();
305 string lastLabel = list->getLabel();
307 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
308 set<string> processedLabels;
309 set<string> userLabels = labels;
311 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
313 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
315 if (allLines == 1 || labels.count(list->getLabel()) == 1){
317 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
319 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
321 processedLabels.insert(list->getLabel());
322 userLabels.erase(list->getLabel());
325 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
326 string saveLabel = list->getLabel();
329 list = input->getListVector(lastLabel);
330 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
334 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
336 processedLabels.insert(list->getLabel());
337 userLabels.erase(list->getLabel());
339 //restore real lastlabel to save below
340 list->setLabel(saveLabel);
343 lastLabel = list->getLabel();
346 list = input->getListVector();
349 //output error messages about any remaining user labels
350 bool needToRun = false;
351 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
352 m->mothurOut("Your file does not include the label " + (*it));
353 if (processedLabels.count(lastLabel) != 1) {
354 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
357 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
361 //run last label if you need to
362 if (needToRun == true) {
363 if (list != NULL) { delete list; }
364 list = input->getListVector(lastLabel);
365 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
370 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
374 if (groupMap != NULL) { delete groupMap; }
375 if (ct != NULL) { delete ct; }
377 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
379 m->mothurOutEndLine();
380 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
381 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
382 m->mothurOutEndLine();
386 catch(exception& e) {
387 m->errorOut(e, "ClassifyOtuCommand", "execute");
391 //**********************************************************************************************************************
392 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) {
395 vector<string> allNames;
396 map<string, string>::iterator it;
397 map<string, string>::iterator it2;
399 //create a tree containing sequences from this bin
400 PhyloTree* phylo = new PhyloTree();
403 for (int i = 0; i < names.size(); i++) {
405 //if namesfile include the names
406 if (namefile != "") {
408 //is this sequence in the name file - namemap maps seqName -> repSeqName
409 it2 = nameMap.find(names[i]);
411 if (it2 == nameMap.end()) { //this name is not in name file, skip it
412 m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine();
415 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
416 it = taxMap.find(it2->second);
418 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
420 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
421 else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
425 phylo->addSeqToTree(names[i], it->second);
427 allNames.push_back(names[i]);
432 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
433 it = taxMap.find(names[i]);
435 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
436 m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
438 if (countfile != "") {
439 int numDups = ct->getNumSeqs(names[i]);
440 for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); }
444 phylo->addSeqToTree(names[i], it->second);
447 allNames.push_back(names[i]);
452 if (m->control_pressed) { delete phylo; return allNames; }
457 phylo->assignHeirarchyIDs(0);
459 TaxNode currentNode = phylo->get(0);
462 while (currentNode.children.size() != 0) { //you still have more to explore
465 int bestChildSize = 0;
467 //go through children
468 for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
470 TaxNode temp = phylo->get(itChild->second);
472 //select child with largest accesions - most seqs assigned to it
473 if (temp.accessions.size() > bestChildSize) {
474 bestChild = phylo->get(itChild->second);
475 bestChildSize = temp.accessions.size();
480 //phylotree adds an extra unknown so we want to remove that
481 if (bestChild.name == "unknown") { bestChildSize--; }
483 //is this taxonomy above cutoff
484 int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
486 if (consensusConfidence >= cutoff) { //if yes, add it
488 conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
490 conTax += bestChild.name + ";";
498 currentNode = bestChild;
501 if (myLevel != phylo->getMaxLevel()) {
502 while (myLevel != phylo->getMaxLevel()) {
503 conTax += "unclassified;";
507 if (conTax == "") { conTax = "no_consensus;"; }
514 catch(exception& e) {
515 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
520 //**********************************************************************************************************************
521 int ClassifyOtuCommand::process(ListVector* processList) {
527 if (outputDir == "") { outputDir += m->hasPath(listfile); }
530 map<string, string> variables;
531 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
532 variables["[distance]"] = processList->getLabel();
533 string outputFile = getOutputFileName("constaxonomy", variables);
534 m->openOutputFile(outputFile, out);
535 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
538 string outputSumFile = getOutputFileName("taxsummary", variables);
539 m->openOutputFile(outputSumFile, outSum);
540 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
542 out << "OTU\tSize\tTaxonomy" << endl;
544 PhyloSummary* taxaSum;
545 if (countfile != "") {
546 if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
547 else { taxaSum = new PhyloSummary(ct); }
549 if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
550 else { taxaSum = new PhyloSummary(groupMap); }
553 vector<ofstream*> outSums;
554 vector<ofstream*> outs;
555 vector<PhyloSummary*> taxaSums;
556 map<string, int> groupIndex;
558 for (int i = 0; i < groups.size(); i++) {
559 groupIndex[groups[i]] = i;
560 ofstream* temp = new ofstream();
561 variables["[distance]"] = processList->getLabel() + "." + groups[i];
562 string outputFile = getOutputFileName("constaxonomy", variables);
563 m->openOutputFile(outputFile, *temp);
564 (*temp) << "OTU\tSize\tTaxonomy" << endl;
565 outs.push_back(temp);
566 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
568 ofstream* tempSum = new ofstream();
569 string outputSumFile = getOutputFileName("taxsummary", variables);
570 m->openOutputFile(outputSumFile, *tempSum);
571 outSums.push_back(tempSum);
572 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
574 PhyloSummary* taxaSumt;
575 if (countfile != "") {
576 if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct); }
577 else { taxaSumt = new PhyloSummary(ct); }
579 if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap); }
580 else { taxaSumt = new PhyloSummary(groupMap); }
582 taxaSums.push_back(taxaSumt);
586 //for each bin in the list vector
587 string snumBins = toString(processList->getNumBins());
588 for (int i = 0; i < processList->getNumBins(); i++) {
590 if (m->control_pressed) { break; }
592 vector<string> names;
593 string binnames = processList->get(i);
594 vector<string> thisNames;
595 m->splitAtComma(binnames, thisNames);
597 names = findConsensusTaxonomy(thisNames, size, conTax);
599 if (m->control_pressed) { break; }
601 //output to new names file
602 string binLabel = "Otu";
603 string sbinNumber = toString(i+1);
604 if (sbinNumber.length() < snumBins.length()) {
605 int diff = snumBins.length() - sbinNumber.length();
606 for (int h = 0; h < diff; h++) { binLabel += "0"; }
608 binLabel += sbinNumber;
610 out << binLabel << '\t' << size << '\t' << conTax << endl;
612 string noConfidenceConTax = conTax;
613 m->removeConfidences(noConfidenceConTax);
615 //add this bins taxonomy to summary
616 if (basis == "sequence") {
617 for(int j = 0; j < names.size(); j++) {
619 if (countfile != "") { numReps = ct->getNumSeqs(names[j]); }
620 for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); }
623 map<string, bool> containsGroup;
624 if (countfile != "") {
625 if (ct->hasGroupInfo()) {
626 vector<string> mGroups = ct->getNamesOfGroups();
627 for (int k = 0; k < names.size(); k++) {
628 vector<int> counts = ct->getGroupCounts(names[k]);
629 for (int h = 0; h < counts.size(); h++) {
630 if (counts[h] != 0) { containsGroup[mGroups[h]] = true; }
635 if (groupfile != "") {
636 vector<string> mGroups = groupMap->getNamesOfGroups();
637 for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
639 for (int k = 0; k < names.size(); k++) {
640 //find out the sequences group
641 string group = groupMap->getGroup(names[k]);
643 if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
645 containsGroup[group] = true;
650 taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
655 //divide names by group
656 map<string, vector<string> > parsedNames;
657 map<string, vector<string> >::iterator itParsed;
659 //parse names by group
660 for (int j = 0; j < names.size(); j++) {
661 if (groupfile != "") {
662 string group = groupMap->getGroup(names[j]);
663 itParsed = parsedNames.find(group);
665 if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
666 else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
667 }else { //count file was used
668 vector<string> thisSeqsGroups = ct->getGroups(names[j]);
669 for (int k = 0; k < thisSeqsGroups.size(); k++) {
670 string group = thisSeqsGroups[k];
671 itParsed = parsedNames.find(group);
673 if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
674 else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
679 for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) {
680 vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax);
682 if (m->control_pressed) { break; }
684 //output to new names file
685 string binLabel = "Otu";
686 string sbinNumber = toString(i+1);
687 if (sbinNumber.length() < snumBins.length()) {
688 int diff = snumBins.length() - sbinNumber.length();
689 for (int h = 0; h < diff; h++) { binLabel += "0"; }
691 binLabel += sbinNumber;
693 (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl;
695 string noConfidenceConTax = conTax;
696 m->removeConfidences(noConfidenceConTax);
698 //add this bins taxonomy to summary
699 if (basis == "sequence") {
700 for(int j = 0; j < theseNames.size(); j++) {
702 if (countfile != "") { numReps = ct->getGroupCount(theseNames[j], itParsed->first); } //get num seqs for this seq from this group
703 for(int k = 0; k < numReps; k++) { (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(theseNames[j], noConfidenceConTax); }
706 map<string, bool> containsGroup;
707 containsGroup[itParsed->first] = true;
708 (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(noConfidenceConTax, containsGroup);
717 taxaSum->print(outSum);
721 for (int i = 0; i < groups.size(); i++) {
723 taxaSums[i]->print(*outSums[i]);
724 (*outSums[i]).close();
736 catch(exception& e) {
737 m->errorOut(e, "ClassifyOtuCommand", "process");
741 /**************************************************************************************************/
742 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
744 string newTax, taxon;
747 //keep what you have counting the levels
748 while (tax.find_first_of(';') != -1) {
750 taxon = tax.substr(0,tax.find_first_of(';'))+';';
751 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
756 //add "unclassified" until you reach maxLevel
757 while (level < maxlevel) {
758 newTax += "unclassified;";
764 catch(exception& e) {
765 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
769 //**********************************************************************************************************************