X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=aligncommand.cpp;h=bcc59675d3b545f68197a3f56911a769ebe249bb;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hp=a871244f4538881c8d285c2a1202597be319498d;hpb=7a18f1438113f6c52c66f97ae0044cfa365dd221;p=mothur.git diff --git a/aligncommand.cpp b/aligncommand.cpp index a871244..bcc5967 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -110,7 +110,7 @@ AlignCommand::AlignCommand(){ //********************************************************************************************************************** AlignCommand::AlignCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; ReferenceDB* rdb = ReferenceDB::getInstance(); //allow user to run help @@ -543,6 +543,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam //moved this into driver to avoid deep copies in windows paralellized version Alignment* alignment; int longestBase = templateDB->getLongestBase(); + if (m->debug) { m->mothurOut("[DEBUG]: template longest base = " + toString(templateDB->getLongestBase()) + " \n"); } if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } @@ -565,11 +566,12 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam int numBasesNeeded = origNumBases * threshold; if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file - if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { - alignment->resize(candidateSeq->getUnaligned().length()+1); + if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) { + if (m->debug) { m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); } + alignment->resize(candidateSeq->getUnaligned().length()+2); } Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; + Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); float searchScore = templateDB->getSearchScore(); @@ -593,19 +595,26 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam //get reverse compliment copy = new Sequence(candidateSeq->getName(), originalUnaligned); copy->reverseComplement(); + + if (m->debug) { m->mothurOut("[DEBUG]: flipping " + candidateSeq->getName() + " \n"); } //rerun alignment Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; + Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); + + if (m->debug) { m->mothurOut("[DEBUG]: closest template " + temp2.getName() + " \n"); } searchScore = templateDB->getSearchScore(); nast2 = new Nast(alignment, copy, templateSeq2); + + if (m->debug) { m->mothurOut("[DEBUG]: completed Nast2 " + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); } //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; + delete templateSeq; + templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; @@ -613,8 +622,10 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam }else{ wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; delete nast2; + delete templateSeq2; delete copy; } + if (m->debug) { m->mothurOut("[DEBUG]: done.\n"); } } //create accnos file with names @@ -630,6 +641,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam report.print(); delete nast; + delete templateSeq; if (needToDeleteCopy) { delete copy; } count++; @@ -644,11 +656,11 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam #endif //report progress - if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + if((count) % 100 == 0){ m->mothurOutJustToScreen(toString(count) + "\n"); } } //report progress - if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + if((count) % 100 != 0){ m->mothurOutJustToScreen(toString(count) + "\n"); } delete alignment; alignmentFile.close(); @@ -734,7 +746,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align } Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; + Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); float searchScore = templateDB->getSearchScore(); @@ -759,7 +771,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align //rerun alignment Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; + Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); searchScore = templateDB->getSearchScore(); @@ -768,7 +780,8 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; + delete templateSeq; + templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; @@ -776,6 +789,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align }else{ wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; delete nast2; + delete templateSeq2; delete copy; } } @@ -821,6 +835,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align delete buf3; delete nast; + delete templateSeq; if (needToDeleteCopy) { delete copy; } } delete candidateSeq; @@ -845,34 +860,72 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s try { int num = 0; processIDS.resize(0); + bool recalc = false; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; //loop through and create all the processes you want while (process != processors) { - int pid = fork(); + pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename); //pass numSeqs to parent ofstream out; - string tempFile = alignFileName + toString(getpid()) + ".num.temp"; + string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); exit(0); }else { - m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); - for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } - exit(0); + m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + recalc = true; + break; } } + if (recalc) { + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + vector positions; + positions = m->divideFile(filename, processors); + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } + + num = 0; + processIDS.resize(0); + process = 1; + + while (process != processors) { + pid_t pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename); + + //pass numSeqs to parent + ofstream out; + string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + } + //do my part num = driver(lines[0], alignFileName, reportFileName, accnosFName, filename); @@ -965,6 +1018,9 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != pDataArray[i]->end) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } num += pDataArray[i]->count; CloseHandle(hThreadArray[i]); delete pDataArray[i];