/**************************************************************************************************/
string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
try {
-
- //taxConfidenceScore.clear(); //clear out previous seqs scores
- vector< map<string, int> > confidenceScores; //you need the added vector level of confusion to account for the level that name is seen since they can be the same
- //map of classification to confidence for all areas seen
- //ie. Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
- //ie. Bacteria -> 100, Alphaproteobacteria -> 100, Rhizobiales -> 87, Azorhizobium_et_rel. -> 78, Methylobacterium_et_rel. -> 70, Bosea -> 50
- confidenceScores.resize(100); //if you have more than 100 levels of classification...
-
- map<string, int>::iterator itBoot;
- map<string, int>::iterator itBoot2;
+ map<int, int> confidenceScores;
+
+ map<int, int>::iterator itBoot;
+ map<int, int>::iterator itBoot2;
map<int, int>::iterator itConvert;
for (int i = 0; i < iters; i++) {
//add to confidence results
while (taxonomy.level != 0) { //while you are not at the root
- itBoot2 = confidenceScores[taxonomy.level].find(taxonomy.name); //is this a classification we already have a count on
+ itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on
- if (itBoot2 == confidenceScores[taxonomy.level].end()) { //not already in confidence scores
- confidenceScores[taxonomy.level][taxonomy.name] = 1;
+ if (itBoot2 == confidenceScores.end()) { //not already in confidence scores
+ confidenceScores[newTax] = 1;
}else{
- confidenceScores[taxonomy.level][taxonomy.name]++;
+ confidenceScores[newTax]++;
}
-
+
+ newTax = taxonomy.parent;
taxonomy = phyloTree->get(taxonomy.parent);
}
string confidenceTax = "";
simpleTax = "";
+
+ int seqTaxIndex = tax;
TaxNode seqTax = phyloTree->get(tax);
while (seqTax.level != 0) { //while you are not at the root
- itBoot2 = confidenceScores[seqTax.level].find(seqTax.name); //is this a classification we already have a count on
+ itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on
int confidence = 0;
- if (itBoot2 != confidenceScores[seqTax.level].end()) { //not already in confidence scores
- confidence = confidenceScores[seqTax.level][seqTax.name];
+ if (itBoot2 != confidenceScores.end()) { //already in confidence scores
+ confidence = confidenceScores[seqTaxIndex];
}
if (((confidence/(float)iters) * 100) >= confidenceThreshold) {
simpleTax = seqTax.name + ";" + simpleTax;
}
+ seqTaxIndex = seqTax.parent;
seqTax = phyloTree->get(seqTax.parent);
}
}
else if (templateFileName == "not open") { abort = true; }
- groupfile = validParameter.validFile(parameters, "group", false);
- if (groupfile == "not found") { groupfile = ""; }
- else {
- splitAtDash(groupfile, groupfileNames);
-
- //go through files and make sure they are good, if not, then disregard them
- for (int i = 0; i < groupfileNames.size(); i++) {
- if (inputDir != "") {
- string path = hasPath(groupfileNames[i]);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { groupfileNames[i] = inputDir + groupfileNames[i]; }
- }
- int ableToOpen;
-
- #ifdef USE_MPI
- int pid;
- MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-
- if (pid == 0) {
- #endif
-
- ifstream in;
- ableToOpen = openInputFile(groupfileNames[i], in);
- in.close();
-
- #ifdef USE_MPI
- for (int j = 1; j < processors; j++) {
- MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
- }
- }else{
- MPI_Status status;
- MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
- }
- #endif
- if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true; }
-
- }
- }
-
fastaFileName = validParameter.validFile(parameters, "fasta", false);
if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the classify.seqs command."); m->mothurOutEndLine(); abort = true; }
else {
if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
}
+ groupfile = validParameter.validFile(parameters, "group", false);
+ if (groupfile == "not found") { groupfile = ""; }
+ else {
+ splitAtDash(groupfile, groupfileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < groupfileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(groupfileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { groupfileNames[i] = inputDir + groupfileNames[i]; }
+ }
+ int ableToOpen;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
+
+ ifstream in;
+ ableToOpen = openInputFile(groupfileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+ if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true; }
+
+ }
+ }
+
if (groupfile != "") {
if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
+ }else {
+ for (int i = 0; i < fastaFileNames.size(); i++) { groupfileNames.push_back(""); }
}
//check for optional parameter and set defaults