if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); }
taxFile = tfile;
- readTaxonomy(taxFile);
+
int numSeqs = 0;
if (tempFile == "saved") {
}
}
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
else if(method == "distance") { database = new DistanceDB(); }
else {
m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
names.push_back(temp.getName());
database->addSequence(temp);
}
- database->generateDB();
+ if ((method == "kmer") && (!shortcuts)) {;} //don't print
+ else {database->generateDB(); }
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
database->readKmerDB(kmerFileTest);
database->setNumSeqs(numSeqs);
- //sanity check
- bool okay = phyloTree->ErrorCheck(names);
-
- if (!okay) { m->control_pressed = true; }
-
m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine();
}else {
m->mothurOut("Generating search database... "); cout.flush();
#ifdef USE_MPI
int pid, processors;
- vector<unsigned long int> positions;
+ vector<unsigned long long> positions;
int tag = 2001;
MPI_Status status;
//create database
if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); }
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", pid); }
else if(method == "distance") { database = new DistanceDB(); }
else {
m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
}
}
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
else if(method == "distance") { database = new DistanceDB(); }
else {
m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
}
fastaFile.close();
- database->generateDB();
+ if ((method == "kmer") && (!shortcuts)) {;} //don't print
+ else {database->generateDB(); }
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
fastaFile.close();
}
#endif
-
+
database->setNumSeqs(names.size());
- //sanity check
- bool okay = phyloTree->ErrorCheck(names);
-
- if (!okay) { m->control_pressed = true; }
-
m->mothurOut("DONE."); m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
}
-
+
+ readTaxonomy(taxFile);
+
+ //sanity check
+ bool okay = phyloTree->ErrorCheck(names);
+
+ if (!okay) { m->control_pressed = true; }
}
catch(exception& e) {
m->errorOut(e, "Classify", "generateDatabaseAndNames");
}
}
/**************************************************************************************************/
-Classify::Classify() { m = MothurOut::getInstance(); database = NULL; }
+Classify::Classify() { m = MothurOut::getInstance(); database = NULL; phyloTree=NULL; flipped=false; }
/**************************************************************************************************/
int Classify::readTaxonomy(string file) {
m->mothurOutEndLine();
m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
-
+ if (m->debug) { m->mothurOut("[DEBUG]: Taxonomies read in...\n"); }
+
#ifdef USE_MPI
int pid, num, processors;
- vector<unsigned long int> positions;
+ vector<unsigned long long> positions;
int tag = 2001;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- //char* inFileName = new char[file.length()];
- //memcpy(inFileName, file.c_str(), file.length());
char inFileName[1024];
strcpy(inFileName, file.c_str());
delete buf4;
istringstream iss (tempBuf,istringstream::in);
- iss >> name >> taxInfo;
- taxonomy[name] = taxInfo;
- phyloTree->addSeqToTree(name, taxInfo);
- }
+ iss >> name; m->gobble(iss);
+ iss >> taxInfo;
+ if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); }
+ if (m->inUsersGroups(name, names)) {
+ taxonomy[name] = taxInfo;
+ phyloTree->addSeqToTree(name, taxInfo);
+ }else {
+ m->mothurOut("[WARNING]: " + name + " is in your taxonomy file and not in your reference file, ignoring.\n");
+ }
+ }
MPI_File_close(&inMPI);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-#else
- ifstream inTax;
- m->openInputFile(file, inTax);
-
- //read template seqs and save
- while (!inTax.eof()) {
- inTax >> name >> taxInfo;
-
- taxonomy[name] = taxInfo;
-
- phyloTree->addSeqToTree(name, taxInfo);
-
- m->gobble(inTax);
- }
- inTax.close();
+#else
+
+ taxonomy.clear();
+ m->readTax(file, taxonomy);
+
+ //commented out to save time with large templates. 6/12/13
+ //map<string, string> tempTaxonomy;
+ for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {
+ //if (m->inUsersGroups(itTax->first, names)) {
+ phyloTree->addSeqToTree(itTax->first, itTax->second);
+ if (m->control_pressed) { break; }
+ //tempTaxonomy[itTax->first] = itTax->second;
+ // }else {
+ // m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
+ //}
+ }
+ //taxonomy = tempTaxonomy;
#endif
-
phyloTree->assignHeirarchyIDs(0);
phyloTree->setUp(file);
vector<string> Classify::parseTax(string tax) {
try {
vector<string> taxons;
-
- tax = tax.substr(0, tax.length()-1); //get rid of last ';'
-
- //parse taxonomy
- string individual;
- while (tax.find_first_of(';') != -1) {
- individual = tax.substr(0,tax.find_first_of(';'));
- tax = tax.substr(tax.find_first_of(';')+1, tax.length());
- taxons.push_back(individual);
-
- }
- //get last one
- taxons.push_back(tax);
-
- return taxons;
+ m->splitAtChar(tax, taxons, ';');
+ return taxons;
}
catch(exception& e) {
m->errorOut(e, "Classify", "parseTax");
}
/**************************************************************************************************/
+double Classify::getLogExpSum(vector<double> probabilities, int& maxIndex){
+ try {
+ // http://jblevins.org/notes/log-sum-exp
+
+ double maxProb = probabilities[0];
+ maxIndex = 0;
+
+ int numProbs = (int)probabilities.size();
+
+ for(int i=1;i<numProbs;i++){
+ if(probabilities[i] >= maxProb){
+ maxProb = probabilities[i];
+ maxIndex = i;
+ }
+ }
+
+ double probSum = 0.0000;
+
+ for(int i=0;i<numProbs;i++){
+ probSum += exp(probabilities[i] - maxProb);
+ }
+
+ probSum = log(probSum) + maxProb;
+
+ return probSum;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Classify", "getLogExpSum");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+