X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=alignmentdb.cpp;h=5a3027229bb8e91bdacfa25dea566a5273ca9d9d;hb=af27acb766f6947c45e1eb65438d878c7ea48ef3;hp=59405efe05963c8d153828d7f8f56976c440f197;hpb=74844a60d80c6dd06e3fb02ee9b928424f9019b0;p=mothur.git diff --git a/alignmentdb.cpp b/alignmentdb.cpp index 59405ef..5a30272 100644 --- a/alignmentdb.cpp +++ b/alignmentdb.cpp @@ -14,30 +14,93 @@ /**************************************************************************************************/ -AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may +AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may try { // need to alter this in the future? m = MothurOut::getInstance(); longest = 0; - - ifstream fastaFile; - openInputFile(fastaFileName, fastaFile); + method = s; + bool needToGenerate = true; m->mothurOutEndLine(); m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush(); + #ifdef USE_MPI + int pid, processors; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); + int tag = 2001; + + char inFileName[1024]; + strcpy(inFileName, fastaFileName.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + if (pid == 0) { + positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + }else{ + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + positions.resize(numSeqs+1); + MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + } + + //read file + for(int i=0;icontrol_pressed) { templateSequences.clear(); break; } + + //read next sequence + int length = positions[i+1] - positions[i]; + char* buf4 = new char[length]; + + MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status); + + string tempBuf = buf4; + if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + + Sequence temp(iss); + if (temp.getName() != "") { + templateSequences.push_back(temp); + //save longest base + if (temp.getUnaligned().length() >= longest) { longest = temp.getUnaligned().length()+1; } + } + } + + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + + MPI_File_close(&inMPI); + + #else + ifstream fastaFile; + m->openInputFile(fastaFileName, fastaFile); + while (!fastaFile.eof()) { - Sequence temp(fastaFile); gobble(fastaFile); + Sequence temp(fastaFile); m->gobble(fastaFile); + + if (m->control_pressed) { templateSequences.clear(); break; } if (temp.getName() != "") { templateSequences.push_back(temp); //save longest base - if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } + if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); } } } - - numSeqs = templateSequences.size(); - fastaFile.close(); + #endif + + numSeqs = templateSequences.size(); //all of this is elsewhere already! m->mothurOut("DONE."); @@ -49,15 +112,21 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - bool needToGenerate = true; + string kmerDBName; if(method == "kmer") { search = new KmerDB(fastaFileName, kmerSize); - kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - - if(kmerFileTest){ needToGenerate = false; } + #ifdef USE_MPI + #else + kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + ifstream kmerFileTest(kmerDBName.c_str()); + + if(kmerFileTest){ + bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion()); + if (GoodFile) { needToGenerate = false; } + } + #endif } else if(method == "suffix") { search = new SuffixDB(numSeqs); } else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); } @@ -67,20 +136,49 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa search = new KmerDB(fastaFileName, 8); } - if (needToGenerate) { - - //add sequences to search - for (int i = 0; i < templateSequences.size(); i++) { - search->addSequence(templateSequences[i]); + if (!(m->control_pressed)) { + if (needToGenerate) { + //add sequences to search + for (int i = 0; i < templateSequences.size(); i++) { + search->addSequence(templateSequences[i]); + + if (m->control_pressed) { templateSequences.clear(); break; } + } + + if (m->control_pressed) { templateSequences.clear(); } + + search->generateDB(); + + }else if ((method == "kmer") && (!needToGenerate)) { + ifstream kmerFileTest(kmerDBName.c_str()); + search->readKmerDB(kmerFileTest); } - search->generateDB(); - - }else if ((method == "kmer") && (!needToGenerate)) { - ifstream kmerFileTest(kmerDBName.c_str()); - search->readKmerDB(kmerFileTest); + + search->setNumSeqs(numSeqs); } + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "AlignmentDB"); + exit(1); + } +} +/**************************************************************************************************/ +AlignmentDB::AlignmentDB(string s){ + try { + m = MothurOut::getInstance(); + method = s; + + if(method == "suffix") { search = new SuffixDB(); } + else if(method == "blast") { search = new BlastDB(); } + else { search = new KmerDB(); } + + + //in case you delete the seqs and then ask for them + emptySequence = Sequence(); + emptySequence.setName("no_match"); + emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - search->setNumSeqs(numSeqs); } catch(exception& e) { m->errorOut(e, "AlignmentDB", "AlignmentDB"); @@ -109,3 +207,5 @@ Sequence AlignmentDB::findClosestSequence(Sequence* seq) { + +