]> git.donarmstrong.com Git - mothur.git/blobdiff - classify.cpp
added MPI to dist.seqs command
[mothur.git] / classify.cpp
index 605c1f8f3c6470310a649cebfcc090141eb7d70d..2db19735a8726e78269fbcce2a58521aa2205b19 100644 (file)
 #include "kmerdb.hpp"
 #include "suffixdb.hpp"
 #include "blastdb.hpp"
+#include "distancedb.hpp"
 
 /**************************************************************************************************/
-
-Classify::Classify(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : taxFile(tfile), templateFile(tempFile) {         
-       try {                                                                                   
-               
-               readTaxonomy(taxFile);
+Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {         
+       try {   
+               m = MothurOut::getInstance();                                                                   
+               readTaxonomy(taxFile);  
                
+               int start = time(NULL);
                int numSeqs = 0;
                //need to know number of template seqs for suffixdb
                if (method == "suffix") {
@@ -29,7 +30,7 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, i
                        inFASTA.close();
                }
 
-               mothurOut("Generating search database...    "); cout.flush();
+               m->mothurOut("Generating search database...    "); cout.flush();
                                
                bool needToGenerate = true;
                string kmerDBName;
@@ -42,21 +43,23 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, i
                }
                else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
                else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
+               else if(method == "distance")   {       database = new DistanceDB();    }
                else {
-                       mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
-                       mothurOutEndLine();
+                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+                       m->mothurOutEndLine();
                        database = new KmerDB(tempFile, 8);
                }
                
                if (needToGenerate) {
                        ifstream fastaFile;
                        openInputFile(tempFile, fastaFile);
-               
+                       
                        while (!fastaFile.eof()) {
                                Sequence temp(fastaFile);
                                gobble(fastaFile);
                        
                                names.push_back(temp.getName());
+                                                               
                                database->addSequence(temp);    
                        }
                        fastaFile.close();
@@ -69,7 +72,7 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, i
                        
                        ifstream fastaFile;
                        openInputFile(tempFile, fastaFile);
-               
+                       
                        while (!fastaFile.eof()) {
                                Sequence temp(fastaFile);
                                gobble(fastaFile);
@@ -81,11 +84,12 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, i
                
                database->setNumSeqs(names.size());
                
-               mothurOut("DONE."); mothurOutEndLine();
+               m->mothurOut("DONE."); m->mothurOutEndLine();
+               m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
 
        }
        catch(exception& e) {
-               errorOut(e, "Classify", "Classify");
+               m->errorOut(e, "Classify", "Classify");
                exit(1);
        }
 }
@@ -93,12 +97,14 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, i
 
 void Classify::readTaxonomy(string file) {
        try {
-       
+               
+               phyloTree = new PhyloTree();
+               
                ifstream inTax;
                openInputFile(file, inTax);
        
-               mothurOutEndLine();
-               mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
+               m->mothurOutEndLine();
+               m->mothurOut("Reading in the " + file + " taxonomy...\t");      cout.flush();
                
                string name, taxInfo;
                //read template seqs and save
@@ -106,17 +112,47 @@ void Classify::readTaxonomy(string file) {
                        inTax >> name >> taxInfo;
                        
                        taxonomy[name] = taxInfo;
+                       
+                       phyloTree->addSeqToTree(name, taxInfo);
                
                        gobble(inTax);
                }
+               
+               phyloTree->assignHeirarchyIDs(0);
                inTax.close();
        
-               mothurOut("DONE.");
-               mothurOutEndLine();     cout.flush();
+               m->mothurOut("DONE.");
+               m->mothurOutEndLine();  cout.flush();
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Classify", "readTaxonomy");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+vector<string> Classify::parseTax(string tax) {
+       try {
+               vector<string> taxons;
+               
+               tax = tax.substr(0, tax.length()-1);  //get rid of last ';'
        
+               //parse taxonomy
+               string individual;
+               while (tax.find_first_of(';') != -1) {
+                       individual = tax.substr(0,tax.find_first_of(';'));
+                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+                       taxons.push_back(individual);
+                       
+               }
+               //get last one
+               taxons.push_back(tax);
+               
+               return taxons;
        }
        catch(exception& e) {
-               errorOut(e, "Classify", "readTaxonomy");
+               m->errorOut(e, "Classify", "parseTax");
                exit(1);
        }
 }