]> git.donarmstrong.com Git - mothur.git/blobdiff - classify.cpp
added template=self capability to chimers.slayer, worked on corr.axes and added accno...
[mothur.git] / classify.cpp
index e07344d4678a0ca263008e4b10d4d125ad8ef122..875628d85d8dbcb4caf6d712b41be517eefef733 100644 (file)
@@ -28,7 +28,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                m->mothurOut("Generating search database...    "); cout.flush();
 #ifdef USE_MPI 
                        int pid, processors;
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        int tag = 2001;
                
                        MPI_Status status; 
@@ -46,7 +46,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        //delete inFileName;
 
                        if (pid == 0) { //only one process needs to scan file
-                               positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
+                               positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
 
                                //send file positions to all processes
                                for(int i = 1; i < processors; i++) { 
@@ -96,8 +96,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                //need to know number of template seqs for suffixdb
                if (method == "suffix") {
                        ifstream inFASTA;
-                       openInputFile(tempFile, inFASTA);
-                       numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       m->openInputFile(tempFile, inFASTA);
+                       m->getNumSeqs(inFASTA, numSeqs);
                        inFASTA.close();
                }
 
@@ -108,7 +108,10 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        
                        kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTest(kmerDBName.c_str());
-                       if(kmerFileTest){       needToGenerate = false;         }
+                       if(kmerFileTest){       
+                               bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+                               if (GoodFile) {  needToGenerate = false;        }
+                       }
                }
                else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
                else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
@@ -121,14 +124,14 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                
                if (needToGenerate) {
                        ifstream fastaFile;
-                       openInputFile(tempFile, fastaFile);
+                       m->openInputFile(tempFile, fastaFile);
                        
                        while (!fastaFile.eof()) {
                                Sequence temp(fastaFile);
-                               gobble(fastaFile);
+                               m->gobble(fastaFile);
                        
                                names.push_back(temp.getName());
-                                                               
+                                                       
                                database->addSequence(temp);    
                        }
                        fastaFile.close();
@@ -138,21 +141,27 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                }else if ((method == "kmer") && (!needToGenerate)) {    
                        ifstream kmerFileTest(kmerDBName.c_str());
                        database->readKmerDB(kmerFileTest);     
-                       
+               
                        ifstream fastaFile;
-                       openInputFile(tempFile, fastaFile);
+                       m->openInputFile(tempFile, fastaFile);
                        
                        while (!fastaFile.eof()) {
                                Sequence temp(fastaFile);
-                               gobble(fastaFile);
+                               m->gobble(fastaFile);
 
                                names.push_back(temp.getName());
                        }
                        fastaFile.close();
                }
-#endif         
+#endif 
+       
                database->setNumSeqs(names.size());
                
+               //sanity check
+               bool okay = phyloTree->ErrorCheck(names);
+               
+               if (!okay) { m->control_pressed = true; }
+               
                m->mothurOut("DONE."); m->mothurOutEndLine();
                m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
 
@@ -177,7 +186,7 @@ int Classify::readTaxonomy(string file) {
 
 #ifdef USE_MPI 
                int pid, num, processors;
-               vector<long> positions;
+               vector<unsigned long int> positions;
                int tag = 2001;
                
                MPI_Status status; 
@@ -195,7 +204,7 @@ int Classify::readTaxonomy(string file) {
                //delete inFileName;
 
                if (pid == 0) {
-                       positions = setFilePosEachLine(file, num);
+                       positions = m->setFilePosEachLine(file, num);
                        
                        //send file positions to all processes
                        for(int i = 1; i < processors; i++) { 
@@ -230,7 +239,7 @@ int Classify::readTaxonomy(string file) {
                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else                          
                ifstream inTax;
-               openInputFile(file, inTax);
+               m->openInputFile(file, inTax);
        
                //read template seqs and save
                while (!inTax.eof()) {
@@ -240,7 +249,7 @@ int Classify::readTaxonomy(string file) {
                        
                        phyloTree->addSeqToTree(name, taxInfo);
                
-                       gobble(inTax);
+                       m->gobble(inTax);
                }
                inTax.close();
 #endif 
@@ -248,7 +257,7 @@ int Classify::readTaxonomy(string file) {
                phyloTree->assignHeirarchyIDs(0);
                
                phyloTree->setUp(file);
-               
+       
                m->mothurOut("DONE.");
                m->mothurOutEndLine();  cout.flush();