+int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
+ try {
+ int indexofGenus = 0;
+
+ double maxProbability = -1000000.0;
+ //find taxonomy with highest probability that this sequence is from it
+
+
+// cout << genusNodes.size() << endl;
+
+
+ for (int k = 0; k < genusNodes.size(); k++) {
+ //for each taxonomy calc its probability
+
+ double prob = 0.0000;
+ for (int i = 0; i < queryKmer.size(); i++) {
+ prob += wordGenusProb[queryKmer[i]][k];
+ }
+
+// cout << phyloTree->get(genusNodes[k]).name << '\t' << prob << endl;
+
+ //is this the taxonomy with the greatest probability?
+ if (prob > maxProbability) {
+ indexofGenus = genusNodes[k];
+ maxProbability = prob;
+ }
+ }
+
+
+ return indexofGenus;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "getMostProbableTaxonomy");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+//if it is more probable that the reverse compliment kmers are in the template, then we assume the sequence is reversed.
+bool Bayesian::isReversed(vector<int>& queryKmers){
+ try{
+ bool reversed = false;
+ float prob = 0;
+ float reverseProb = 0;
+
+ for (int i = 0; i < queryKmers.size(); i++){
+ int kmer = queryKmers[i];
+ if (kmer >= 0){
+ prob += WordPairDiffArr[kmer].prob;
+ reverseProb += WordPairDiffArr[kmer].reverseProb;
+ }
+ }
+
+ if (reverseProb > prob){ reversed = true; }
+
+ return reversed;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "isReversed");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+int Bayesian::generateWordPairDiffArr(){
+ try{
+ Kmer kmer(kmerSize);
+ for (int i = 0; i < WordPairDiffArr.size(); i++) {
+ int reversedWord = kmer.getReverseKmerNumber(i);
+ WordPairDiffArr[i].reverseProb = WordPairDiffArr[reversedWord].prob;
+ }
+
+ return 0;
+ }catch(exception& e) {
+ m->errorOut(e, "Bayesian", "generateWordPairDiffArr");
+ exit(1);
+ }
+}
+/*************************************************************************************************
+map<string, int> Bayesian::parseTaxMap(string newTax) {
+ try{
+
+ map<string, int> parsed;
+
+ newTax = newTax.substr(0, newTax.length()-1); //get rid of last ';'
+
+ //parse taxonomy
+ string individual;
+ while (newTax.find_first_of(';') != -1) {
+ individual = newTax.substr(0,newTax.find_first_of(';'));
+ newTax = newTax.substr(newTax.find_first_of(';')+1, newTax.length());
+ parsed[individual] = 1;
+ }
+
+ //get last one
+ parsed[newTax] = 1;
+
+ return parsed;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "parseTax");
+ exit(1);
+ }
+}
+**************************************************************************************************/
+void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string inNumName) {
+ try{
+
+ #ifdef USE_MPI
+
+ int pid, num, num2, processors;
+ vector<unsigned long long> positions;
+ vector<unsigned long long> positions2;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_File inMPI2;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ int tag = 2001;
+
+ char inFileName[1024];
+ strcpy(inFileName, inNumName.c_str());
+
+ char inFileName2[1024];
+ strcpy(inFileName2, inName.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, inFileName2, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI2); //comm, filename, mode, info, filepointer
+
+ if (pid == 0) {
+ positions = m->setFilePosEachLine(inNumName, num);
+ positions2 = m->setFilePosEachLine(inName, num2);
+
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+
+ MPI_Send(&num2, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions2[0], (num2+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+
+ }else{
+ MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(num+1);
+ MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+ MPI_Recv(&num2, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions2.resize(num2+1);
+ MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read version
+ int length = positions2[1] - positions2[0];
+ char* buf5 = new char[length];
+
+ MPI_File_read_at(inMPI2, positions2[0], buf5, length, MPI_CHAR, &status);
+ delete buf5;
+
+ //read numKmers
+ length = positions2[2] - positions2[1];
+ char* buf = new char[length];
+
+ MPI_File_read_at(inMPI2, positions2[1], buf, length, MPI_CHAR, &status);
+
+ string tempBuf = buf;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf;
+
+ istringstream iss (tempBuf,istringstream::in);
+ iss >> numKmers;
+
+ //initialze probabilities
+ wordGenusProb.resize(numKmers);
+
+ for (int j = 0; j < wordGenusProb.size(); j++) { wordGenusProb[j].resize(genusNodes.size()); }
+
+ int kmer, name;
+ vector<int> numbers; numbers.resize(numKmers);
+ float prob;
+ vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ WordPairDiffArr.resize(numKmers);
+
+ //read version
+ length = positions[1] - positions[0];
+ char* buf6 = new char[length];
+
+ MPI_File_read_at(inMPI2, positions[0], buf6, length, MPI_CHAR, &status);
+ delete buf6;
+
+ //read file
+ for(int i=1;i<num;i++){
+ //read next sequence
+ length = positions[i+1] - positions[i];
+ char* buf4 = new char[length];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+
+ tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+ float probTemp;
+ iss >> zeroCountProb[i] >> numbers[i] >> probTemp;
+ WordPairDiffArr[i].prob = probTemp;
+
+ }
+
+ MPI_File_close(&inMPI);
+
+ for(int i=2;i<num2;i++){
+ //read next sequence
+ length = positions2[i+1] - positions2[i];
+ char* buf4 = new char[length];
+
+ MPI_File_read_at(inMPI2, positions2[i], buf4, length, MPI_CHAR, &status);
+
+ tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+
+ iss >> kmer;
+
+ //set them all to zero value
+ for (int i = 0; i < genusNodes.size(); i++) {
+ wordGenusProb[kmer][i] = log(zeroCountProb[kmer] / (float) (genusTotals[i]+1));
+ }
+
+ //get probs for nonzero values
+ for (int i = 0; i < numbers[kmer]; i++) {
+ iss >> name >> prob;
+ wordGenusProb[kmer][name] = prob;
+ }
+
+ }
+ MPI_File_close(&inMPI2);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ #else
+ //read version
+ string line = m->getline(in); m->gobble(in);
+
+ in >> numKmers; m->gobble(in);
+ //cout << threadID << '\t' << line << '\t' << numKmers << &in << '\t' << &inNum << '\t' << genusNodes.size() << endl;
+ //initialze probabilities
+ wordGenusProb.resize(numKmers);
+
+ for (int j = 0; j < wordGenusProb.size(); j++) { wordGenusProb[j].resize(genusNodes.size()); }
+
+ int kmer, name, count; count = 0;
+ vector<int> num; num.resize(numKmers);
+ float prob;
+ vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ WordPairDiffArr.resize(numKmers);
+
+ //read version
+ string line2 = m->getline(inNum); m->gobble(inNum);
+ float probTemp;
+ //cout << threadID << '\t' << line2 << '\t' << this << endl;
+ while (inNum) {
+ inNum >> zeroCountProb[count] >> num[count] >> probTemp;
+ WordPairDiffArr[count].prob = probTemp;
+ count++;
+ m->gobble(inNum);
+ //cout << threadID << '\t' << count << endl;
+ }
+ inNum.close();
+ //cout << threadID << '\t' << "here1 " << &wordGenusProb << '\t' << &num << endl; //
+ //cout << threadID << '\t' << &genusTotals << '\t' << endl;
+ //cout << threadID << '\t' << genusNodes.size() << endl;
+ while(in) {
+ in >> kmer;
+ //cout << threadID << '\t' << kmer << endl;
+ //set them all to zero value
+ for (int i = 0; i < genusNodes.size(); i++) {
+ wordGenusProb[kmer][i] = log(zeroCountProb[kmer] / (float) (genusTotals[i]+1));
+ }
+ //cout << threadID << '\t' << num[kmer] << "here" << endl;
+ //get probs for nonzero values
+ for (int i = 0; i < num[kmer]; i++) {
+ in >> name >> prob;
+ wordGenusProb[kmer][name] = prob;
+ }
+
+ m->gobble(in);
+ }
+ in.close();
+ //cout << threadID << '\t' << "here" << endl;
+ #endif
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "readProbFile");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+bool Bayesian::checkReleaseDate(ifstream& file1, ifstream& file2, ifstream& file3, ifstream& file4) {
+ try {
+
+ bool good = true;
+
+ vector<string> lines;
+ lines.push_back(m->getline(file1));
+ lines.push_back(m->getline(file2));
+ lines.push_back(m->getline(file3));
+ lines.push_back(m->getline(file4));
+
+ //before we added this check
+ if ((lines[0][0] != '#') || (lines[1][0] != '#') || (lines[2][0] != '#') || (lines[3][0] != '#')) { good = false; }
+ else {
+ //rip off #
+ for (int i = 0; i < lines.size(); i++) { lines[i] = lines[i].substr(1); }
+
+ //get mothurs current version
+ string version = m->getVersion();
+
+ vector<string> versionVector;
+ m->splitAtChar(version, versionVector, '.');
+
+ //check each files version
+ for (int i = 0; i < lines.size(); i++) {
+ vector<string> linesVector;
+ m->splitAtChar(lines[i], linesVector, '.');
+
+ if (versionVector.size() != linesVector.size()) { good = false; break; }
+ else {
+ for (int j = 0; j < versionVector.size(); j++) {
+ int num1, num2;
+ convert(versionVector[j], num1);
+ convert(linesVector[j], num2);
+
+ //if mothurs version is newer than this files version, then we want to remake it
+ if (num1 > num2) { good = false; break; }
+ }
+ }
+
+ if (!good) { break; }
+ }
+ }
+
+ if (!good) { file1.close(); file2.close(); file3.close(); file4.close(); }
+ else { file1.seekg(0); file2.seekg(0); file3.seekg(0); file4.seekg(0); }
+
+ return good;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "checkReleaseDate");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+
+
+
+