]> git.donarmstrong.com Git - mothur.git/blobdiff - bayesian.cpp
fixed cluster.split with average method
[mothur.git] / bayesian.cpp
index 22eab72b60574679f61eca1f02990a3178aecd75..b46f7703a971b8b5141d42faf14e26d45037cba0 100644 (file)
@@ -17,9 +17,11 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
        try {
                                        
                /************calculate the probablity that each word will be in a specific taxonomy*************/
-               string phyloTreeName = tfile.substr(0,tfile.find_last_of(".")+1) + "tree.train";
-               string probFileName = tfile.substr(0,tfile.find_last_of(".")+1) + tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer.prob";
-               string probFileName2 = tfile.substr(0,tfile.find_last_of(".")+1) + tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer.numNonZero";
+               string tfileroot = tfile.substr(0,tfile.find_last_of(".")+1);
+               string tempfileroot = getRootName(getSimpleName(tempFile));
+               string phyloTreeName = tfileroot + "tree.train";
+               string probFileName = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.prob";
+               string probFileName2 = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.numNonZero";
                
                ofstream out;
                ofstream out2;
@@ -213,7 +215,7 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                int index = getMostProbableTaxonomy(queryKmers);
                
                if (m->control_pressed) { return tax; }
-                                       
+//cout << seq->getName() << '\t' << index << endl;                                     
                //bootstrap - to set confidenceScore
                int numToSelect = queryKmers.size() / 8;
                tax = bootstrapResults(queryKmers, index, numToSelect);
@@ -249,10 +251,10 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                        
                        //get taxonomy
                        int newTax = getMostProbableTaxonomy(temp);
-                       TaxNode taxonomy = phyloTree->get(newTax);
-                       
+                       TaxNode taxonomyTemp = phyloTree->get(newTax);
+       
                        //add to confidence results
-                       while (taxonomy.level != 0) { //while you are not at the root
+                       while (taxonomyTemp.level != 0) { //while you are not at the root
                                
                                itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on
                                
@@ -262,8 +264,8 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                        confidenceScores[newTax]++;
                                }
                                
-                               newTax = taxonomy.parent;
-                               taxonomy = phyloTree->get(taxonomy.parent);
+                               newTax = taxonomyTemp.parent;
+                               taxonomyTemp = phyloTree->get(newTax);
                        }
        
                }
@@ -362,7 +364,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                
                #ifdef USE_MPI
                        
-                       int pid, num, num2;
+                       int pid, num, num2, processors;
                        vector<long> positions;
                        vector<long> positions2;
                        
@@ -370,6 +372,8 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        MPI_File inMPI;
                        MPI_File inMPI2;
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
+                       int tag = 2001;
 
                        char inFileName[1024];
                        strcpy(inFileName, inNumName.c_str());
@@ -382,26 +386,24 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
 
                        if (pid == 0) {
                                positions = setFilePosEachLine(inNumName, num);
-                               
-                               //send file positions to all processes
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos 
-                               
                                positions2 = setFilePosEachLine(inName, num2);
                                
-                               //send file positions to all processes
-                               MPI_Bcast(&num2, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions2[0], (num2+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos       
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       
+                                       MPI_Send(&num2, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions2[0], (num2+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
 
                        }else{
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions.resize(num);
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               positions.resize(num+1);
+                               MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
-                               MPI_Bcast(&num2, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions2.resize(num2);
-                               MPI_Bcast(&positions2[0], (num2+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-
+                               MPI_Recv(&num2, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               positions2.resize(num2+1);
+                               MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                        }
                
                        //read numKmers
@@ -473,6 +475,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                                
                        }
                        MPI_File_close(&inMPI2);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
                
                        in >> numKmers; gobble(in);