X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classify.cpp;fp=classify.cpp;h=0000000000000000000000000000000000000000;hb=4a877efa127e56e81a21f53cfdbbfd3bfbe8c4ff;hp=2d01183479a504c798a18071df045f723643f850;hpb=a6cf29fa4dac0909c7582cb1094151d34093ee76;p=mothur.git diff --git a/classify.cpp b/classify.cpp deleted file mode 100644 index 2d01183..0000000 --- a/classify.cpp +++ /dev/null @@ -1,367 +0,0 @@ -/* - * classify.cpp - * Mothur - * - * Created by westcott on 11/3/09. - * Copyright 2009 Schloss Lab. All rights reserved. - * - */ - -#include "classify.h" -#include "sequence.hpp" -#include "kmerdb.hpp" -#include "suffixdb.hpp" -#include "blastdb.hpp" -#include "distancedb.hpp" -#include "referencedb.h" - -/**************************************************************************************************/ -void Classify::generateDatabaseAndNames(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) { - try { - ReferenceDB* rdb = ReferenceDB::getInstance(); - - if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); } - - taxFile = tfile; - readTaxonomy(taxFile); - int numSeqs = 0; - - if (tempFile == "saved") { - int start = time(NULL); - m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); - - numSeqs = rdb->referenceSeqs.size(); - templateFile = rdb->getSavedReference(); - tempFile = rdb->getSavedReference(); - - bool needToGenerate = true; - string kmerDBName; - if(method == "kmer") { - database = new KmerDB(tempFile, kmerSize); - - kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - if(kmerFileTest){ - bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion()); - if (GoodFile) { needToGenerate = false; } - } - } - else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } - else if(method == "distance") { database = new DistanceDB(); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); - m->mothurOutEndLine(); - database = new KmerDB(tempFile, 8); - } - - if (needToGenerate) { - for (int k = 0; k < rdb->referenceSeqs.size(); k++) { - Sequence temp(rdb->referenceSeqs[k].getName(), rdb->referenceSeqs[k].getAligned()); - names.push_back(temp.getName()); - database->addSequence(temp); - } - database->generateDB(); - }else if ((method == "kmer") && (!needToGenerate)) { - ifstream kmerFileTest(kmerDBName.c_str()); - database->readKmerDB(kmerFileTest); - - for (int k = 0; k < rdb->referenceSeqs.size(); k++) { - names.push_back(rdb->referenceSeqs[k].getName()); - } - } - - database->setNumSeqs(numSeqs); - - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - - m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine(); - - }else { - - templateFile = tempFile; - - int start = time(NULL); - - m->mothurOut("Generating search database... "); cout.flush(); - #ifdef USE_MPI - int pid, processors; - vector positions; - int tag = 2001; - - MPI_Status status; - MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - //char* inFileName = new char[tempFile.length()]; - //memcpy(inFileName, tempFile.c_str(), tempFile.length()); - - char inFileName[1024]; - strcpy(inFileName, tempFile.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - //delete inFileName; - - if (pid == 0) { //only one process needs to scan file - positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); - } - }else{ - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - positions.resize(numSeqs+1); - MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - } - - //create database - if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); } - else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", pid); } - else if(method == "distance") { database = new DistanceDB(); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine(); - database = new KmerDB(tempFile, 8); - } - - //read file - for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } - delete buf4; - istringstream iss (tempBuf,istringstream::in); - - Sequence temp(iss); - if (temp.getName() != "") { - if (rdb->save) { rdb->referenceSeqs.push_back(temp); } - names.push_back(temp.getName()); - database->addSequence(temp); - } - } - - database->generateDB(); - MPI_File_close(&inMPI); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case - #else - - //need to know number of template seqs for suffixdb - if (method == "suffix") { - ifstream inFASTA; - m->openInputFile(tempFile, inFASTA); - m->getNumSeqs(inFASTA, numSeqs); - inFASTA.close(); - } - - bool needToGenerate = true; - string kmerDBName; - if(method == "kmer") { - database = new KmerDB(tempFile, kmerSize); - - kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - if(kmerFileTest){ - bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion()); - if (GoodFile) { needToGenerate = false; } - } - } - else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } - else if(method == "distance") { database = new DistanceDB(); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); - m->mothurOutEndLine(); - database = new KmerDB(tempFile, 8); - } - - if (needToGenerate) { - ifstream fastaFile; - m->openInputFile(tempFile, fastaFile); - - while (!fastaFile.eof()) { - Sequence temp(fastaFile); - m->gobble(fastaFile); - - if (rdb->save) { rdb->referenceSeqs.push_back(temp); } - - names.push_back(temp.getName()); - - database->addSequence(temp); - } - fastaFile.close(); - - database->generateDB(); - - }else if ((method == "kmer") && (!needToGenerate)) { - ifstream kmerFileTest(kmerDBName.c_str()); - database->readKmerDB(kmerFileTest); - - ifstream fastaFile; - m->openInputFile(tempFile, fastaFile); - - while (!fastaFile.eof()) { - Sequence temp(fastaFile); - m->gobble(fastaFile); - - if (rdb->save) { rdb->referenceSeqs.push_back(temp); } - names.push_back(temp.getName()); - } - fastaFile.close(); - } - #endif - - database->setNumSeqs(names.size()); - - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - - m->mothurOut("DONE."); m->mothurOutEndLine(); - m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); - } - - } - catch(exception& e) { - m->errorOut(e, "Classify", "generateDatabaseAndNames"); - exit(1); - } -} -/**************************************************************************************************/ -Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; } -/**************************************************************************************************/ - -int Classify::readTaxonomy(string file) { - try { - - phyloTree = new PhyloTree(); - string name, taxInfo; - - m->mothurOutEndLine(); - m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); - -#ifdef USE_MPI - int pid, num, processors; - vector positions; - int tag = 2001; - - MPI_Status status; - MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - //char* inFileName = new char[file.length()]; - //memcpy(inFileName, file.c_str(), file.length()); - - char inFileName[1024]; - strcpy(inFileName, file.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - //delete inFileName; - - if (pid == 0) { - positions = m->setFilePosEachLine(file, num); - - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD); - } - }else{ - MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - positions.resize(num+1); - MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - } - - //read file - for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } - delete buf4; - - istringstream iss (tempBuf,istringstream::in); - iss >> name; m->gobble(iss); - iss >> taxInfo; - taxonomy[name] = taxInfo; - phyloTree->addSeqToTree(name, taxInfo); - } - - MPI_File_close(&inMPI); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case -#else - ifstream inTax; - m->openInputFile(file, inTax); - - //read template seqs and save - while (!inTax.eof()) { - inTax >> name; m->gobble(inTax); - inTax >> taxInfo; - - taxonomy[name] = taxInfo; - - phyloTree->addSeqToTree(name, taxInfo); - - m->gobble(inTax); - } - inTax.close(); -#endif - - phyloTree->assignHeirarchyIDs(0); - - phyloTree->setUp(file); - - m->mothurOut("DONE."); - m->mothurOutEndLine(); cout.flush(); - - return phyloTree->getNumSeqs(); - - } - catch(exception& e) { - m->errorOut(e, "Classify", "readTaxonomy"); - exit(1); - } -} -/**************************************************************************************************/ - -vector Classify::parseTax(string tax) { - try { - vector taxons; - - tax = tax.substr(0, tax.length()-1); //get rid of last ';' - - //parse taxonomy - string individual; - while (tax.find_first_of(';') != -1) { - individual = tax.substr(0,tax.find_first_of(';')); - tax = tax.substr(tax.find_first_of(';')+1, tax.length()); - taxons.push_back(individual); - - } - //get last one - taxons.push_back(tax); - - return taxons; - } - catch(exception& e) { - m->errorOut(e, "Classify", "parseTax"); - exit(1); - } -} -/**************************************************************************************************/ -