X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=aligncommand.cpp;h=613f9b7d6f13b04dec679a162ba71faf25326c65;hb=d84bb41d7dadcfa2c67ce5edb9b94060e5659fa7;hp=16006f72e602e37270d3f2f8396245f921bd064f;hpb=52ea8f3a52c05331b9bd7519ae0b518bda233d05;p=mothur.git diff --git a/aligncommand.cpp b/aligncommand.cpp index 16006f7..613f9b7 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -45,7 +45,7 @@ AlignCommand::AlignCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("align.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -98,31 +98,22 @@ AlignCommand::AlignCommand(string option) { int ableToOpen; ifstream in; - - #ifdef USE_MPI - int pid; - MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - if (pid == 0) { - #endif - ableToOpen = openInputFile(candidateFileNames[i], in); - in.close(); - - #ifdef USE_MPI - for (int j = 1; j < processors; j++) { - MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); - } - }else{ - MPI_Status status; - MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + ableToOpen = openInputFile(candidateFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(candidateFileNames[i]); + m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + candidateFileNames[i] = tryPath; } - - #endif + } + in.close(); if (ableToOpen == 1) { - m->mothurOut(candidateFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + m->mothurOut("Unable to open " + candidateFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); //erase from file list candidateFileNames.erase(candidateFileNames.begin()+i); i--; @@ -252,9 +243,9 @@ int AlignCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPIWroteAccnos = false; - + MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); @@ -301,7 +292,6 @@ int AlignCommand::execute(){ int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } - //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); @@ -356,24 +346,16 @@ int AlignCommand::execute(){ } #else - + vector positions = divideFile(candidateFileNames[s], processors); + + for (int i = 0; i < (positions.size()-1); i++) { + lines.push_back(new linePair(positions[i], positions[(i+1)])); + } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - getNumSeqs(inFASTA, numFastaSeqs); - inFASTA.close(); - - lines.push_back(new linePair(0, numFastaSeqs)); + numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } //delete accnos file if its blank else report to user if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } @@ -384,36 +366,10 @@ int AlignCommand::execute(){ }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } m->mothurOutEndLine(); } - } - else{ - vector positions; + }else{ processIDS.resize(0); - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } - } - inFASTA.close(); - - numFastaSeqs = positions.size(); - - int numSeqsPerProcessor = numFastaSeqs / processors; - - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str()); rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str()); @@ -450,29 +406,12 @@ int AlignCommand::execute(){ m->mothurOutEndLine(); }else{ hasAccnos = false; } - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } } #else - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - getNumSeqs(inFASTA, numFastaSeqs); - inFASTA.close(); - - lines.push_back(new linePair(0, numFastaSeqs)); + numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } //delete accnos file if its blank else report to user if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } @@ -524,7 +463,7 @@ int AlignCommand::execute(){ //********************************************************************************************************************** -int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){ +int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){ try { ofstream alignmentFile; openOutputFile(alignFName, alignmentFile); @@ -537,9 +476,12 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, ifstream inFASTA; openInputFile(filename, inFASTA); - inFASTA.seekg(line->start); + inFASTA.seekg(filePos->start); + + bool done = false; + int count = 0; - for(int i=0;inumSeqs;i++){ + while (!done) { if (m->control_pressed) { return 0; } @@ -571,7 +513,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, //if there is a possibility that this sequence should be reversed if (candidateSeq->getNumBases() < numBasesNeeded) { - string wasBetter = ""; + string wasBetter = ""; //if the user wants you to try the reverse if (flip) { //get reverse compliment @@ -593,8 +535,9 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, delete nast; nast = nast2; needToDeleteCopy = true; + wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement."; }else{ - wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; + wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; delete nast2; delete copy; } @@ -615,20 +558,26 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, report.print(); delete nast; if (needToDeleteCopy) { delete copy; } + + count++; } delete candidateSeq; + unsigned long int pos = inFASTA.tellg(); + if ((pos == -1) || (pos >= filePos->end)) { break; } + //report progress - if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } + if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + } //report progress - if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); } + if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } alignmentFile.close(); inFASTA.close(); accnosFile.close(); - return 1; + return count; } catch(exception& e) { m->errorOut(e, "AlignCommand", "driver"); @@ -637,7 +586,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, } //********************************************************************************************************************** #ifdef USE_MPI -int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ +int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ try { string outputString = ""; MPI_Status statusReport; @@ -669,7 +618,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align int length = MPIPos[start+i+1] - MPIPos[start+i]; char* buf4 = new char[length]; - memcpy(buf4, outputString.c_str(), length); + //memcpy(buf4, outputString.c_str(), length); MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); @@ -678,9 +627,11 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align delete buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } - istringstream iss (tempBuf,istringstream::in); + istringstream iss (tempBuf,istringstream::in); + Sequence* candidateSeq = new Sequence(iss); + int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * threshold; @@ -802,7 +753,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; - int exitCommand = 1; + int num = 0; // processIDS.resize(0); //loop through and create all the processes you want @@ -813,7 +764,15 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - exitCommand = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + + //pass numSeqs to parent + ofstream out; + string tempFile = alignFileName + toString(getpid()) + ".num.temp"; + openOutputFile(tempFile, out); + out << num << endl; + out.close(); + exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } @@ -824,7 +783,15 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s wait(&temp); } - return exitCommand; + for (int i = 0; i < processIDS.size(); i++) { + ifstream in; + string tempFile = alignFileName + toString(processIDS[i]) + ".num.temp"; + openInputFile(tempFile, in); + if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } + in.close(); remove(tempFile.c_str()); + } + + return num; #endif } catch(exception& e) { @@ -832,7 +799,6 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s exit(1); } } - /**************************************************************************************************/ void AlignCommand::appendAlignFiles(string temp, string filename) {