]> git.donarmstrong.com Git - mothur.git/blobdiff - classify.cpp
added modify names parameter to set.dir
[mothur.git] / classify.cpp
index 212e563f94c4ae7af3ba0916d1040afd9b0e11fa..36179f471da4ac5d40b563e609feb2f8d3d32e6c 100644 (file)
@@ -23,7 +23,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); }
                
                taxFile = tfile;
-               readTaxonomy(taxFile);  
+               
                int numSeqs = 0;
                
                if (tempFile == "saved") {
@@ -61,7 +61,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                        names.push_back(temp.getName());
                                        database->addSequence(temp);    
                                }
-                               database->generateDB();
+                               if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); }
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
                                database->readKmerDB(kmerFileTest);     
@@ -73,11 +74,6 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        
                        database->setNumSeqs(numSeqs);
                        
-                       //sanity check
-                       bool okay = phyloTree->ErrorCheck(names);
-                       
-                       if (!okay) { m->control_pressed = true; }
-                       
                        m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine();  
                        
                }else {
@@ -200,7 +196,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                }
                                fastaFile.close();
 
-                               database->generateDB();
+                if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); } 
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
@@ -219,18 +216,19 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                fastaFile.close();
                        }
        #endif  
-               
+               
                        database->setNumSeqs(names.size());
                        
-                       //sanity check
-                       bool okay = phyloTree->ErrorCheck(names);
-                       
-                       if (!okay) { m->control_pressed = true; }
-                       
                        m->mothurOut("DONE."); m->mothurOutEndLine();
                        m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
                }
-
+        
+        readTaxonomy(taxFile);
+        
+        //sanity check
+        bool okay = phyloTree->ErrorCheck(names);
+        
+        if (!okay) { m->control_pressed = true; }
        }
        catch(exception& e) {
                m->errorOut(e, "Classify", "generateDatabaseAndNames");
@@ -238,7 +236,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
        }
 }
 /**************************************************************************************************/
-Classify::Classify() {         m = MothurOut::getInstance();   database = NULL;        flipped=false; }
+Classify::Classify() {         m = MothurOut::getInstance();   database = NULL;        phyloTree=NULL; flipped=false; }
 /**************************************************************************************************/
 
 int Classify::readTaxonomy(string file) {
@@ -260,9 +258,6 @@ int Classify::readTaxonomy(string file) {
                MPI_File inMPI;
                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
                MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
-               //char* inFileName = new char[file.length()];
-               //memcpy(inFileName, file.c_str(), file.length());
                
                char inFileName[1024];
                strcpy(inFileName, file.c_str());
@@ -300,9 +295,13 @@ int Classify::readTaxonomy(string file) {
                        iss >> name; m->gobble(iss);
             iss >> taxInfo;
             if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); }
-                       taxonomy[name] = taxInfo;
-                       phyloTree->addSeqToTree(name, taxInfo);
-               }
+                       if (m->inUsersGroups(name, names)) {
+                taxonomy[name] = taxInfo;
+                phyloTree->addSeqToTree(name, taxInfo);
+            }else {
+                m->mothurOut("[WARNING]: " + name + " is in your taxonomy file and not in your reference file, ignoring.\n");
+            }          
+        }
                
                MPI_File_close(&inMPI);
                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
@@ -310,7 +309,16 @@ int Classify::readTaxonomy(string file) {
         
         taxonomy.clear(); 
         m->readTax(file, taxonomy);
-        for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {  phyloTree->addSeqToTree(itTax->first, itTax->second);  }
+        map<string, string> tempTaxonomy;
+        for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {  
+            if (m->inUsersGroups(itTax->first, names)) {
+                phyloTree->addSeqToTree(itTax->first, itTax->second); 
+                tempTaxonomy[itTax->first] = itTax->second;
+            }else {
+                m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
+            }
+        }
+        taxonomy = tempTaxonomy;
 #endif 
                phyloTree->assignHeirarchyIDs(0);
                
@@ -355,3 +363,37 @@ vector<string> Classify::parseTax(string tax) {
 }
 /**************************************************************************************************/
 
+double Classify::getLogExpSum(vector<double> probabilities, int& maxIndex){
+       try {
+        //     http://jblevins.org/notes/log-sum-exp
+        
+        double maxProb = probabilities[0];
+        maxIndex = 0;
+        
+        int numProbs = (int)probabilities.size();
+        
+        for(int i=1;i<numProbs;i++){
+            if(probabilities[i] >= maxProb){
+                maxProb = probabilities[i];
+                maxIndex = i;
+            }
+        }
+        
+        double probSum = 0.0000;
+        
+        for(int i=0;i<numProbs;i++){
+            probSum += exp(probabilities[i] - maxProb);                
+        }
+        
+        probSum = log(probSum) + maxProb;
+        
+        return probSum;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Classify", "getLogExpSum");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+