]> git.donarmstrong.com Git - mothur.git/commitdiff
changed confidence scores calculation in bayesian
authorwestcott <westcott>
Fri, 7 May 2010 17:59:09 +0000 (17:59 +0000)
committerwestcott <westcott>
Fri, 7 May 2010 17:59:09 +0000 (17:59 +0000)
bayesian.cpp
classifyseqscommand.cpp

index 00467ff0feb0a5df82314685520943376582278e..a4229c6b3d331ca5a04ee49ac2385b2965963421 100644 (file)
@@ -216,17 +216,11 @@ string Bayesian::getTaxonomy(Sequence* seq) {
 /**************************************************************************************************/
 string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
        try {
-               
-               //taxConfidenceScore.clear(); //clear out previous seqs scores
                                
-               vector< map<string, int> > confidenceScores; //you need the added vector level of confusion to account for the level that name is seen since they can be the same
-                                                                               //map of classification to confidence for all areas seen
-                                                                          //ie. Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
-                                                                          //ie. Bacteria -> 100, Alphaproteobacteria -> 100, Rhizobiales -> 87, Azorhizobium_et_rel. -> 78, Methylobacterium_et_rel. -> 70, Bosea -> 50
-               confidenceScores.resize(100);  //if you have more than 100 levels of classification...
-               
-               map<string, int>::iterator itBoot;
-               map<string, int>::iterator itBoot2;
+               map<int, int> confidenceScores; 
+                               
+               map<int, int>::iterator itBoot;
+               map<int, int>::iterator itBoot2;
                map<int, int>::iterator itConvert;
                
                for (int i = 0; i < iters; i++) {
@@ -248,14 +242,15 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                        //add to confidence results
                        while (taxonomy.level != 0) { //while you are not at the root
                                
-                               itBoot2 = confidenceScores[taxonomy.level].find(taxonomy.name); //is this a classification we already have a count on
+                               itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on
                                
-                               if (itBoot2 == confidenceScores[taxonomy.level].end()) { //not already in confidence scores
-                                       confidenceScores[taxonomy.level][taxonomy.name] = 1;
+                               if (itBoot2 == confidenceScores.end()) { //not already in confidence scores
+                                       confidenceScores[newTax] = 1;
                                }else{
-                                       confidenceScores[taxonomy.level][taxonomy.name]++;
+                                       confidenceScores[newTax]++;
                                }
-               
+                               
+                               newTax = taxonomy.parent;
                                taxonomy = phyloTree->get(taxonomy.parent);
                        }
        
@@ -263,15 +258,17 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                
                string confidenceTax = "";
                simpleTax = "";
+               
+               int seqTaxIndex = tax;
                TaxNode seqTax = phyloTree->get(tax);
                
                while (seqTax.level != 0) { //while you are not at the root
                                        
-                               itBoot2 = confidenceScores[seqTax.level].find(seqTax.name); //is this a classification we already have a count on
+                               itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on
                                
                                int confidence = 0;
-                               if (itBoot2 != confidenceScores[seqTax.level].end()) { //not already in confidence scores
-                                       confidence = confidenceScores[seqTax.level][seqTax.name];
+                               if (itBoot2 != confidenceScores.end()) { //already in confidence scores
+                                       confidence = confidenceScores[seqTaxIndex];
                                }
                                
                                if (((confidence/(float)iters) * 100) >= confidenceThreshold) {
@@ -279,6 +276,7 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                        simpleTax = seqTax.name + ";" + simpleTax;
                                }
                                
+                               seqTaxIndex = seqTax.parent;
                                seqTax = phyloTree->get(seqTax.parent);
                }
                
index bdf059c6a063aadfa2256cfe2368d12c7b69bc0e..580dd9b6e056197bfc48cb39abcfb526237e1487 100644 (file)
@@ -82,47 +82,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                        }
                        else if (templateFileName == "not open") { abort = true; }      
                        
-                       groupfile = validParameter.validFile(parameters, "group", false);
-                       if (groupfile == "not found") { groupfile = "";  }
-                       else { 
-                               splitAtDash(groupfile, groupfileNames);
-                               
-                               //go through files and make sure they are good, if not, then disregard them
-                               for (int i = 0; i < groupfileNames.size(); i++) {
-                                       if (inputDir != "") {
-                                               string path = hasPath(groupfileNames[i]);
-                                               //if the user has not given a path then, add inputdir. else leave path alone.
-                                               if (path == "") {       groupfileNames[i] = inputDir + groupfileNames[i];               }
-                                       }
-                                       int ableToOpen;
-                                       
-                                       #ifdef USE_MPI  
-                                               int pid;
-                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
-                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                               
-                                               if (pid == 0) {
-                                       #endif
-
-                                       ifstream in;
-                                       ableToOpen = openInputFile(groupfileNames[i], in);
-                                       in.close();
-                                       
-                                       #ifdef USE_MPI  
-                                                       for (int j = 1; j < processors; j++) {
-                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
-                                                       }
-                                               }else{
-                                                       MPI_Status status;
-                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
-                                               }
                                                
-                                       #endif
-                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true;       }
-                                       
-                               }
-                       }
-                       
                        fastaFileName = validParameter.validFile(parameters, "fasta", false);
                        if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the classify.seqs command."); m->mothurOutEndLine(); abort = true;  }
                        else { 
@@ -230,8 +190,51 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
                        }
                        
+                       groupfile = validParameter.validFile(parameters, "group", false);
+                       if (groupfile == "not found") { groupfile = "";  }
+                       else { 
+                               splitAtDash(groupfile, groupfileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < groupfileNames.size(); i++) {
+                                       if (inputDir != "") {
+                                               string path = hasPath(groupfileNames[i]);
+                                               //if the user has not given a path then, add inputdir. else leave path alone.
+                                               if (path == "") {       groupfileNames[i] = inputDir + groupfileNames[i];               }
+                                       }
+                                       int ableToOpen;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
+
+                                       ifstream in;
+                                       ableToOpen = openInputFile(groupfileNames[i], in);
+                                       in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true;       }
+                                       
+                               }
+                       }
+
                        if (groupfile != "") {
                                if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
+                       }else {
+                               for (int i = 0; i < fastaFileNames.size(); i++) {  groupfileNames.push_back("");  }
                        }
                        
                        //check for optional parameter and set defaults