From: westcott Date: Fri, 7 May 2010 17:59:09 +0000 (+0000) Subject: changed confidence scores calculation in bayesian X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=7a7870ab773b993d8d1fd89703b1df3beb47f8d4 changed confidence scores calculation in bayesian --- diff --git a/bayesian.cpp b/bayesian.cpp index 00467ff..a4229c6 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -216,17 +216,11 @@ string Bayesian::getTaxonomy(Sequence* seq) { /**************************************************************************************************/ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { try { - - //taxConfidenceScore.clear(); //clear out previous seqs scores - vector< map > confidenceScores; //you need the added vector level of confusion to account for the level that name is seen since they can be the same - //map of classification to confidence for all areas seen - //ie. Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea; - //ie. Bacteria -> 100, Alphaproteobacteria -> 100, Rhizobiales -> 87, Azorhizobium_et_rel. -> 78, Methylobacterium_et_rel. -> 70, Bosea -> 50 - confidenceScores.resize(100); //if you have more than 100 levels of classification... - - map::iterator itBoot; - map::iterator itBoot2; + map confidenceScores; + + map::iterator itBoot; + map::iterator itBoot2; map::iterator itConvert; for (int i = 0; i < iters; i++) { @@ -248,14 +242,15 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { //add to confidence results while (taxonomy.level != 0) { //while you are not at the root - itBoot2 = confidenceScores[taxonomy.level].find(taxonomy.name); //is this a classification we already have a count on + itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on - if (itBoot2 == confidenceScores[taxonomy.level].end()) { //not already in confidence scores - confidenceScores[taxonomy.level][taxonomy.name] = 1; + if (itBoot2 == confidenceScores.end()) { //not already in confidence scores + confidenceScores[newTax] = 1; }else{ - confidenceScores[taxonomy.level][taxonomy.name]++; + confidenceScores[newTax]++; } - + + newTax = taxonomy.parent; taxonomy = phyloTree->get(taxonomy.parent); } @@ -263,15 +258,17 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { string confidenceTax = ""; simpleTax = ""; + + int seqTaxIndex = tax; TaxNode seqTax = phyloTree->get(tax); while (seqTax.level != 0) { //while you are not at the root - itBoot2 = confidenceScores[seqTax.level].find(seqTax.name); //is this a classification we already have a count on + itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on int confidence = 0; - if (itBoot2 != confidenceScores[seqTax.level].end()) { //not already in confidence scores - confidence = confidenceScores[seqTax.level][seqTax.name]; + if (itBoot2 != confidenceScores.end()) { //already in confidence scores + confidence = confidenceScores[seqTaxIndex]; } if (((confidence/(float)iters) * 100) >= confidenceThreshold) { @@ -279,6 +276,7 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { simpleTax = seqTax.name + ";" + simpleTax; } + seqTaxIndex = seqTax.parent; seqTax = phyloTree->get(seqTax.parent); } diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index bdf059c..580dd9b 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -82,47 +82,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { } else if (templateFileName == "not open") { abort = true; } - groupfile = validParameter.validFile(parameters, "group", false); - if (groupfile == "not found") { groupfile = ""; } - else { - splitAtDash(groupfile, groupfileNames); - - //go through files and make sure they are good, if not, then disregard them - for (int i = 0; i < groupfileNames.size(); i++) { - if (inputDir != "") { - string path = hasPath(groupfileNames[i]); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { groupfileNames[i] = inputDir + groupfileNames[i]; } - } - int ableToOpen; - - #ifdef USE_MPI - int pid; - MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - if (pid == 0) { - #endif - - ifstream in; - ableToOpen = openInputFile(groupfileNames[i], in); - in.close(); - - #ifdef USE_MPI - for (int j = 1; j < processors; j++) { - MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); - } - }else{ - MPI_Status status; - MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); - } - #endif - if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true; } - - } - } - fastaFileName = validParameter.validFile(parameters, "fasta", false); if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the classify.seqs command."); m->mothurOutEndLine(); abort = true; } else { @@ -230,8 +190,51 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); } } + groupfile = validParameter.validFile(parameters, "group", false); + if (groupfile == "not found") { groupfile = ""; } + else { + splitAtDash(groupfile, groupfileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < groupfileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(groupfileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { groupfileNames[i] = inputDir + groupfileNames[i]; } + } + int ableToOpen; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ifstream in; + ableToOpen = openInputFile(groupfileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true; } + + } + } + if (groupfile != "") { if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); } + }else { + for (int i = 0; i < fastaFileNames.size(); i++) { groupfileNames.push_back(""); } } //check for optional parameter and set defaults