2 * classifyotucommand.cpp
5 * Created by westcott on 6/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "classifyotucommand.h"
11 #include "phylotree.h"
12 #include "phylosummary.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> ClassifyOtuCommand::setParameters(){
18 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
19 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
20 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
21 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
24 CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppersample);
25 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
26 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
27 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
28 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "ClassifyOtuCommand", "setParameters");
41 //**********************************************************************************************************************
42 string ClassifyOtuCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
46 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
47 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
48 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
49 helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
50 helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
51 helpString += "For example consider the following basis=sequence could give Clostridiales 3 105 16 43 46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
52 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
53 helpString += "Now for basis=otu could give Clostridiales 3 7 6 1 2, where 7 is the number of otus that classified to Clostridiales.\n";
54 helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n";
55 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
56 helpString += "The default value for label is all labels in your inputfile.\n";
57 helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
58 helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n";
59 helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n";
60 helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n";
61 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n";
65 m->errorOut(e, "ClassifyOtuCommand", "getHelpString");
69 //**********************************************************************************************************************
70 string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){
72 string outputFileName = "";
73 map<string, vector<string> >::iterator it;
75 //is this a type this command creates
76 it = outputTypes.find(type);
77 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
79 if (type == "constaxonomy") { outputFileName = "cons.taxonomy"; }
80 else if (type == "taxsummary") { outputFileName = "cons.tax.summary"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
83 return outputFileName;
86 m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag");
90 //**********************************************************************************************************************
91 ClassifyOtuCommand::ClassifyOtuCommand(){
93 abort = true; calledHelp = true;
95 vector<string> tempOutNames;
96 outputTypes["constaxonomy"] = tempOutNames;
97 outputTypes["taxsummary"] = tempOutNames;
100 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
105 //**********************************************************************************************************************
106 ClassifyOtuCommand::ClassifyOtuCommand(string option) {
108 abort = false; calledHelp = false;
112 //allow user to run help
113 if (option == "help") {
114 help(); abort = true; calledHelp = true;
115 }else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117 vector<string> myArray = setParameters();
119 OptionParser parser(option);
120 map<string, string> parameters = parser.getParameters();
122 ValidParameters validParameter;
123 map<string, string>::iterator it;
125 //check to make sure all parameters are valid for command
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["constaxonomy"] = tempOutNames;
133 outputTypes["taxsummary"] = tempOutNames;
135 //if the user changes the input directory command factory will send this info to us in the output parameter
136 string inputDir = validParameter.validFile(parameters, "inputdir", false);
137 if (inputDir == "not found"){ inputDir = ""; }
140 it = parameters.find("list");
141 //user has given a template file
142 if(it != parameters.end()){
143 path = m->hasPath(it->second);
144 //if the user has not given a path then, add inputdir. else leave path alone.
145 if (path == "") { parameters["list"] = inputDir + it->second; }
148 it = parameters.find("name");
149 //user has given a template file
150 if(it != parameters.end()){
151 path = m->hasPath(it->second);
152 //if the user has not given a path then, add inputdir. else leave path alone.
153 if (path == "") { parameters["name"] = inputDir + it->second; }
156 it = parameters.find("taxonomy");
157 //user has given a template file
158 if(it != parameters.end()){
159 path = m->hasPath(it->second);
160 //if the user has not given a path then, add inputdir. else leave path alone.
161 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
164 it = parameters.find("reftaxonomy");
165 //user has given a template file
166 if(it != parameters.end()){
167 path = m->hasPath(it->second);
168 //if the user has not given a path then, add inputdir. else leave path alone.
169 if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
172 it = parameters.find("group");
173 //user has given a template file
174 if(it != parameters.end()){
175 path = m->hasPath(it->second);
176 //if the user has not given a path then, add inputdir. else leave path alone.
177 if (path == "") { parameters["group"] = inputDir + it->second; }
180 it = parameters.find("count");
181 //user has given a template file
182 if(it != parameters.end()){
183 path = m->hasPath(it->second);
184 //if the user has not given a path then, add inputdir. else leave path alone.
185 if (path == "") { parameters["count"] = inputDir + it->second; }
190 //if the user changes the output directory command factory will send this info to us in the output parameter
191 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
193 //check for required parameters
194 listfile = validParameter.validFile(parameters, "list", true);
195 if (listfile == "not found") {
196 //if there is a current list file, use it
197 listfile = m->getListFile();
198 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
199 else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
201 else if (listfile == "not open") { abort = true; }
202 else { m->setListFile(listfile); }
204 taxfile = validParameter.validFile(parameters, "taxonomy", true);
205 if (taxfile == "not found") { //if there is a current list file, use it
206 taxfile = m->getTaxonomyFile();
207 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
208 else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
210 else if (taxfile == "not open") { abort = true; }
211 else { m->setTaxonomyFile(taxfile); }
213 refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
214 if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
215 else if (refTaxonomy == "not open") { abort = true; }
217 namefile = validParameter.validFile(parameters, "name", true);
218 if (namefile == "not open") { namefile = ""; abort = true; }
219 else if (namefile == "not found") { namefile = ""; }
220 else { m->setNameFile(namefile); }
222 groupfile = validParameter.validFile(parameters, "group", true);
223 if (groupfile == "not open") { abort = true; }
224 else if (groupfile == "not found") { groupfile = ""; }
225 else { m->setGroupFile(groupfile); }
227 countfile = validParameter.validFile(parameters, "count", true);
228 if (countfile == "not open") { countfile = ""; abort = true; }
229 else if (countfile == "not found") { countfile = ""; }
230 else { m->setCountTableFile(countfile); }
232 if ((namefile != "") && (countfile != "")) {
233 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
236 if ((groupfile != "") && (countfile != "")) {
237 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
241 //check for optional parameter and set defaults
242 // ...at some point should added some additional type checking...
243 label = validParameter.validFile(parameters, "label", false);
244 if (label == "not found") { label = ""; allLines = 1; }
246 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
247 else { allLines = 1; }
250 basis = validParameter.validFile(parameters, "basis", false);
251 if (basis == "not found") { basis = "otu"; }
253 if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
255 string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "51"; }
256 m->mothurConvert(temp, cutoff);
258 temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; }
259 probs = m->isTrue(temp);
261 temp = validParameter.validFile(parameters, "persample", false); if (temp == "not found"){ temp = "f"; }
262 persample = m->isTrue(temp);
264 if ((groupfile == "") && (countfile == "")) { if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
266 if (countfile != "") {
268 if (!cts.testGroups(countfile)) {
269 if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; }
273 if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
275 if (countfile == "") {
277 vector<string> files; files.push_back(taxfile);
278 parser.getNameFile(files);
284 catch(exception& e) {
285 m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
289 //**********************************************************************************************************************
291 int ClassifyOtuCommand::execute(){
294 if (abort == true) { if (calledHelp) { return 0; } return 2; }
296 //if user gave a namesfile then use it
297 if (namefile != "") { m->readNames(namefile, nameMap, true); }
298 if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); groups = groupMap->getNamesOfGroups(); }
299 else { groupMap = NULL; }
300 if (countfile != "") { ct = new CountTable(); ct->readTable(countfile); if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } }
303 //read taxonomy file and save in map for easy access in building bin trees
304 m->readTax(taxfile, taxMap);
306 if (m->control_pressed) { return 0; }
308 input = new InputData(listfile, "list");
309 list = input->getListVector();
310 string lastLabel = list->getLabel();
312 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
313 set<string> processedLabels;
314 set<string> userLabels = labels;
316 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
318 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
320 if (allLines == 1 || labels.count(list->getLabel()) == 1){
322 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
324 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
326 processedLabels.insert(list->getLabel());
327 userLabels.erase(list->getLabel());
330 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
331 string saveLabel = list->getLabel();
334 list = input->getListVector(lastLabel);
335 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
339 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
341 processedLabels.insert(list->getLabel());
342 userLabels.erase(list->getLabel());
344 //restore real lastlabel to save below
345 list->setLabel(saveLabel);
348 lastLabel = list->getLabel();
351 list = input->getListVector();
354 //output error messages about any remaining user labels
355 bool needToRun = false;
356 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
357 m->mothurOut("Your file does not include the label " + (*it));
358 if (processedLabels.count(lastLabel) != 1) {
359 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
362 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
366 //run last label if you need to
367 if (needToRun == true) {
368 if (list != NULL) { delete list; }
369 list = input->getListVector(lastLabel);
370 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
375 if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
379 if (groupMap != NULL) { delete groupMap; }
380 if (ct != NULL) { delete ct; }
382 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
384 m->mothurOutEndLine();
385 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
386 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
387 m->mothurOutEndLine();
391 catch(exception& e) {
392 m->errorOut(e, "ClassifyOtuCommand", "execute");
396 //**********************************************************************************************************************
397 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) {
400 vector<string> allNames;
401 map<string, string>::iterator it;
402 map<string, string>::iterator it2;
404 //create a tree containing sequences from this bin
405 PhyloTree* phylo = new PhyloTree();
408 for (int i = 0; i < names.size(); i++) {
410 //if namesfile include the names
411 if (namefile != "") {
413 //is this sequence in the name file - namemap maps seqName -> repSeqName
414 it2 = nameMap.find(names[i]);
416 if (it2 == nameMap.end()) { //this name is not in name file, skip it
417 m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine();
420 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
421 it = taxMap.find(it2->second);
423 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
425 if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
426 else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
430 phylo->addSeqToTree(names[i], it->second);
432 allNames.push_back(names[i]);
437 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
438 it = taxMap.find(names[i]);
440 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
441 m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
443 if (countfile != "") {
444 int numDups = ct->getNumSeqs(names[i]);
445 for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); }
449 phylo->addSeqToTree(names[i], it->second);
452 allNames.push_back(names[i]);
457 if (m->control_pressed) { delete phylo; return allNames; }
462 phylo->assignHeirarchyIDs(0);
464 TaxNode currentNode = phylo->get(0);
467 while (currentNode.children.size() != 0) { //you still have more to explore
470 int bestChildSize = 0;
472 //go through children
473 for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
475 TaxNode temp = phylo->get(itChild->second);
477 //select child with largest accesions - most seqs assigned to it
478 if (temp.accessions.size() > bestChildSize) {
479 bestChild = phylo->get(itChild->second);
480 bestChildSize = temp.accessions.size();
485 //phylotree adds an extra unknown so we want to remove that
486 if (bestChild.name == "unknown") { bestChildSize--; }
488 //is this taxonomy above cutoff
489 int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
491 if (consensusConfidence >= cutoff) { //if yes, add it
493 conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
495 conTax += bestChild.name + ";";
503 currentNode = bestChild;
506 if (myLevel != phylo->getMaxLevel()) {
507 while (myLevel != phylo->getMaxLevel()) {
508 conTax += "unclassified;";
512 if (conTax == "") { conTax = "no_consensus;"; }
519 catch(exception& e) {
520 m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
525 //**********************************************************************************************************************
526 int ClassifyOtuCommand::process(ListVector* processList) {
532 if (outputDir == "") { outputDir += m->hasPath(listfile); }
535 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("constaxonomy");
536 m->openOutputFile(outputFile, out);
537 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
540 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("taxsummary");
541 m->openOutputFile(outputSumFile, outSum);
542 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
544 out << "OTU\tSize\tTaxonomy" << endl;
546 PhyloSummary* taxaSum;
547 if (countfile != "") {
548 if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
549 else { taxaSum = new PhyloSummary(ct); }
551 if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
552 else { taxaSum = new PhyloSummary(groupMap); }
555 vector<ofstream*> outSums;
556 vector<ofstream*> outs;
557 vector<PhyloSummary*> taxaSums;
558 map<string, int> groupIndex;
560 for (int i = 0; i < groups.size(); i++) {
561 groupIndex[groups[i]] = i;
562 ofstream* temp = new ofstream();
563 string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + groups[i] + "." +getOutputFileNameTag("constaxonomy");
564 m->openOutputFile(outputFile, *temp);
565 (*temp) << "OTU\tSize\tTaxonomy" << endl;
566 outs.push_back(temp);
567 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
569 ofstream* tempSum = new ofstream();
570 string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + groups[i] + "." +getOutputFileNameTag("taxsummary");
571 m->openOutputFile(outputSumFile, *tempSum);
572 outSums.push_back(tempSum);
573 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
575 PhyloSummary* taxaSumt;
576 if (countfile != "") {
577 if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct); }
578 else { taxaSumt = new PhyloSummary(ct); }
580 if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap); }
581 else { taxaSumt = new PhyloSummary(groupMap); }
583 taxaSums.push_back(taxaSumt);
587 //for each bin in the list vector
588 string snumBins = toString(processList->getNumBins());
589 for (int i = 0; i < processList->getNumBins(); i++) {
591 if (m->control_pressed) { break; }
593 vector<string> names;
594 string binnames = processList->get(i);
595 vector<string> thisNames;
596 m->splitAtComma(binnames, thisNames);
598 names = findConsensusTaxonomy(thisNames, size, conTax);
600 if (m->control_pressed) { break; }
602 //output to new names file
603 string binLabel = "Otu";
604 string sbinNumber = toString(i+1);
605 if (sbinNumber.length() < snumBins.length()) {
606 int diff = snumBins.length() - sbinNumber.length();
607 for (int h = 0; h < diff; h++) { binLabel += "0"; }
609 binLabel += sbinNumber;
611 out << binLabel << '\t' << size << '\t' << conTax << endl;
613 string noConfidenceConTax = conTax;
614 m->removeConfidences(noConfidenceConTax);
616 //add this bins taxonomy to summary
617 if (basis == "sequence") {
618 for(int j = 0; j < names.size(); j++) {
620 if (countfile != "") { numReps = ct->getNumSeqs(names[j]); }
621 for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); }
624 map<string, bool> containsGroup;
625 if (countfile != "") {
626 if (ct->hasGroupInfo()) {
627 vector<string> mGroups = ct->getNamesOfGroups();
628 for (int k = 0; k < names.size(); k++) {
629 vector<int> counts = ct->getGroupCounts(names[k]);
630 for (int h = 0; h < counts.size(); h++) {
631 if (counts[h] != 0) { containsGroup[mGroups[h]] = true; }
636 if (groupfile != "") {
637 vector<string> mGroups = groupMap->getNamesOfGroups();
638 for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
640 for (int k = 0; k < names.size(); k++) {
641 //find out the sequences group
642 string group = groupMap->getGroup(names[k]);
644 if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
646 containsGroup[group] = true;
651 taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
656 //divide names by group
657 map<string, vector<string> > parsedNames;
658 map<string, vector<string> >::iterator itParsed;
660 //parse names by group
661 for (int j = 0; j < names.size(); j++) {
662 if (groupfile != "") {
663 string group = groupMap->getGroup(names[j]);
664 itParsed = parsedNames.find(group);
666 if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
667 else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
668 }else { //count file was used
669 vector<string> thisSeqsGroups = ct->getGroups(names[j]);
670 for (int k = 0; k < thisSeqsGroups.size(); k++) {
671 string group = thisSeqsGroups[k];
672 itParsed = parsedNames.find(group);
674 if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); }
675 else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; }
680 for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) {
681 vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax);
683 if (m->control_pressed) { break; }
685 //output to new names file
686 string binLabel = "Otu";
687 string sbinNumber = toString(i+1);
688 if (sbinNumber.length() < snumBins.length()) {
689 int diff = snumBins.length() - sbinNumber.length();
690 for (int h = 0; h < diff; h++) { binLabel += "0"; }
692 binLabel += sbinNumber;
694 (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl;
696 string noConfidenceConTax = conTax;
697 m->removeConfidences(noConfidenceConTax);
699 //add this bins taxonomy to summary
700 if (basis == "sequence") {
701 for(int j = 0; j < theseNames.size(); j++) {
703 if (countfile != "") { numReps = ct->getGroupCount(theseNames[j], itParsed->first); } //get num seqs for this seq from this group
704 for(int k = 0; k < numReps; k++) { (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(theseNames[j], noConfidenceConTax); }
707 map<string, bool> containsGroup;
708 containsGroup[itParsed->first] = true;
709 (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(noConfidenceConTax, containsGroup);
718 taxaSum->print(outSum);
722 for (int i = 0; i < groups.size(); i++) {
724 taxaSums[i]->print(*outSums[i]);
725 (*outSums[i]).close();
737 catch(exception& e) {
738 m->errorOut(e, "ClassifyOtuCommand", "process");
742 /**************************************************************************************************/
743 string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
745 string newTax, taxon;
748 //keep what you have counting the levels
749 while (tax.find_first_of(';') != -1) {
751 taxon = tax.substr(0,tax.find_first_of(';'))+';';
752 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
757 //add "unclassified" until you reach maxLevel
758 while (level < maxlevel) {
759 newTax += "unclassified;";
765 catch(exception& e) {
766 m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
770 //**********************************************************************************************************************