X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=aligncommand.cpp;h=5aabeef9437bd9f5fdd90f6b785e52065afc3115;hb=b20d7d9b1b0eeeaedb78d8fdf26833fd212d2e81;hp=f19810e131b687ea372ae1914ecf3a02d03ef8d8;hpb=b4f80c1d2be78a8743a408a2b6d462b07f9f71ff;p=mothur.git diff --git a/aligncommand.cpp b/aligncommand.cpp index f19810e..5aabeef 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -68,7 +68,7 @@ AlignCommand::AlignCommand(string option) { //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["template"] = inputDir + it->second; } } @@ -86,49 +86,33 @@ AlignCommand::AlignCommand(string option) { candidateFileName = validParameter.validFile(parameters, "candidate", false); if (candidateFileName == "not found") { m->mothurOut("candidate is a required parameter for the align.seqs command."); m->mothurOutEndLine(); abort = true; } else { - splitAtDash(candidateFileName, candidateFileNames); + m->splitAtDash(candidateFileName, candidateFileNames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < candidateFileNames.size(); i++) { + //candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]); + if (inputDir != "") { - string path = hasPath(candidateFileNames[i]); + string path = m->hasPath(candidateFileNames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { candidateFileNames[i] = inputDir + candidateFileNames[i]; } } int ableToOpen; ifstream in; - - #ifdef USE_MPI - int pid; - MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - if (pid == 0) { - #endif - ableToOpen = openInputFile(candidateFileNames[i], in, "noerror"); + ableToOpen = m->openInputFile(candidateFileNames[i], in, "noerror"); //if you can't open it, try default location if (ableToOpen == 1) { if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + getSimpleName(candidateFileNames[i]); + string tryPath = m->getDefaultPath() + m->getSimpleName(candidateFileNames[i]); m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = openInputFile(tryPath, in, "noerror"); + ableToOpen = m->openInputFile(tryPath, in, "noerror"); candidateFileNames[i] = tryPath; } } in.close(); - #ifdef USE_MPI - for (int j = 1; j < processors; j++) { - MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); - } - }else{ - MPI_Status status; - MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); - } - - #endif if (ableToOpen == 1) { m->mothurOut("Unable to open " + candidateFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); @@ -165,7 +149,7 @@ AlignCommand::AlignCommand(string option) { convert(temp, processors); temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; } - flip = isTrue(temp); + flip = m->isTrue(temp); temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.50"; } convert(temp, threshold); @@ -248,10 +232,10 @@ int AlignCommand::execute(){ m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine(); - if (outputDir == "") { outputDir += hasPath(candidateFileNames[s]); } - string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align"; - string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report"; - string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos"; + if (outputDir == "") { outputDir += m->hasPath(candidateFileNames[s]); } + string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align"; + string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align.report"; + string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "flip.accnos"; bool hasAccnos = true; int numFastaSeqs = 0; @@ -263,7 +247,7 @@ int AlignCommand::execute(){ int tag = 2001; vector MPIPos; MPIWroteAccnos = false; - + MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); @@ -297,7 +281,7 @@ int AlignCommand::execute(){ if (pid == 0) { //you are the root process - MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs + MPIPos = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes for(int i = 1; i < processors; i++) { @@ -310,7 +294,6 @@ int AlignCommand::execute(){ int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } - //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); @@ -365,27 +348,19 @@ int AlignCommand::execute(){ } #else - + + vector positions = m->divideFile(candidateFileNames[s], processors); + for (int i = 0; i < (positions.size()-1); i++) { + lines.push_back(new linePair(positions[i], positions[(i+1)])); + } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - getNumSeqs(inFASTA, numFastaSeqs); - inFASTA.close(); - - lines.push_back(new linePair(0, numFastaSeqs)); + numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } //delete accnos file if its blank else report to user - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + if (m->isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } else { m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { @@ -393,36 +368,10 @@ int AlignCommand::execute(){ }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } m->mothurOutEndLine(); } - } - else{ - vector positions; + }else{ processIDS.resize(0); - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } - } - inFASTA.close(); - - numFastaSeqs = positions.size(); - - int numSeqsPerProcessor = numFastaSeqs / processors; - - for (int i = 0; i < processors; i++) { - unsigned long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str()); rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str()); @@ -439,7 +388,7 @@ int AlignCommand::execute(){ vector nonBlankAccnosFiles; //delete blank accnos files generated with multiple processes for(int i=0;iisBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) { nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp"); }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); } } @@ -459,32 +408,15 @@ int AlignCommand::execute(){ m->mothurOutEndLine(); }else{ hasAccnos = false; } - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } } #else - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - getNumSeqs(inFASTA, numFastaSeqs); - inFASTA.close(); - - lines.push_back(new linePair(0, numFastaSeqs)); + numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } //delete accnos file if its blank else report to user - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + if (m->isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } else { m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { @@ -533,26 +465,29 @@ int AlignCommand::execute(){ //********************************************************************************************************************** -int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){ +int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){ try { ofstream alignmentFile; - openOutputFile(alignFName, alignmentFile); + m->openOutputFile(alignFName, alignmentFile); ofstream accnosFile; - openOutputFile(accnosFName, accnosFile); + m->openOutputFile(accnosFName, accnosFile); NastReport report(reportFName); ifstream inFASTA; - openInputFile(filename, inFASTA); + m->openInputFile(filename, inFASTA); + + inFASTA.seekg(filePos->start); - inFASTA.seekg(line->start); + bool done = false; + int count = 0; - for(int i=0;inumSeqs;i++){ + while (!done) { if (m->control_pressed) { return 0; } - Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); + Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); @@ -580,7 +515,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, //if there is a possibility that this sequence should be reversed if (candidateSeq->getNumBases() < numBasesNeeded) { - string wasBetter = ""; + string wasBetter = ""; //if the user wants you to try the reverse if (flip) { //get reverse compliment @@ -602,8 +537,9 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, delete nast; nast = nast2; needToDeleteCopy = true; + wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement."; }else{ - wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; + wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; delete nast2; delete copy; } @@ -624,20 +560,26 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, report.print(); delete nast; if (needToDeleteCopy) { delete copy; } + + count++; } delete candidateSeq; + unsigned long int pos = inFASTA.tellg(); + if ((pos == -1) || (pos >= filePos->end)) { break; } + //report progress - if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } + if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + } //report progress - if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); } + if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } alignmentFile.close(); inFASTA.close(); accnosFile.close(); - return 1; + return count; } catch(exception& e) { m->errorOut(e, "AlignCommand", "driver"); @@ -678,7 +620,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align int length = MPIPos[start+i+1] - MPIPos[start+i]; char* buf4 = new char[length]; - memcpy(buf4, outputString.c_str(), length); + //memcpy(buf4, outputString.c_str(), length); MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); @@ -687,9 +629,11 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align delete buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } - istringstream iss (tempBuf,istringstream::in); + istringstream iss (tempBuf,istringstream::in); + Sequence* candidateSeq = new Sequence(iss); + int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * threshold; @@ -811,7 +755,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; - int exitCommand = 1; + int num = 0; // processIDS.resize(0); //loop through and create all the processes you want @@ -822,7 +766,15 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - exitCommand = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + + //pass numSeqs to parent + ofstream out; + string tempFile = alignFileName + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out.close(); + exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } @@ -833,7 +785,15 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s wait(&temp); } - return exitCommand; + for (int i = 0; i < processIDS.size(); i++) { + ifstream in; + string tempFile = alignFileName + toString(processIDS[i]) + ".num.temp"; + m->openInputFile(tempFile, in); + if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } + in.close(); remove(tempFile.c_str()); + } + + return num; #endif } catch(exception& e) { @@ -841,7 +801,6 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s exit(1); } } - /**************************************************************************************************/ void AlignCommand::appendAlignFiles(string temp, string filename) { @@ -849,8 +808,8 @@ void AlignCommand::appendAlignFiles(string temp, string filename) { ofstream output; ifstream input; - openOutputFileAppend(filename, output); - openInputFile(temp, input); + m->openOutputFileAppend(filename, output); + m->openInputFile(temp, input); while(char c = input.get()){ if(input.eof()) { break; } @@ -872,8 +831,8 @@ void AlignCommand::appendReportFiles(string temp, string filename) { ofstream output; ifstream input; - openOutputFileAppend(filename, output); - openInputFile(temp, input); + m->openOutputFileAppend(filename, output); + m->openInputFile(temp, input); while (!input.eof()) { char c = input.get(); if (c == 10 || c == 13){ break; } } // get header line