X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=shhhercommand.cpp;h=8ae76d9836c31b0d3a37ac41d58a6aa54db6e859;hb=2c97dd48b8e27ee0a6a86c7a082f4c504c3357c6;hp=5bcd7d843a18829793afcc9660af7b7667385c4b;hpb=ffc44592ff7ae94f14f9e21f87198e33d323cd1d;p=mothur.git diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 5bcd7d8..8ae76d9 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -150,14 +150,21 @@ ShhherCommand::ShhherCommand(string option) { else{ ofstream temp; - string thisoutputDir = m->hasPath(flowFilesFileName); //if user entered a file with a path then preserve it + string thisoutputDir = outputDir; + if (outputDir == "") { thisoutputDir = m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it - //flow.files = 9 character offset - compositeFASTAFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.fasta"; + //we want to rip off .files, and also .flow if its there + string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName)); + if (fileroot[fileroot.length()-1] == '.') { fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot + string extension = m->getExtension(fileroot); + if (extension == ".flow") { fileroot = m->getRootName(fileroot); } + else { fileroot += "."; } //add back if needed + + compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta"; m->openOutputFile(compositeFASTAFileName, temp); temp.close(); - compositeNamesFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.names"; + compositeNamesFileName = thisoutputDir + fileroot + "shhh.names"; m->openOutputFile(compositeNamesFileName, temp); temp.close(); } @@ -931,6 +938,8 @@ void ShhherCommand::initPyroCluster(){ try{ if (numOTUs < processors) { processors = 1; } + if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); } + dist.assign(numSeqs * numOTUs, 0); change.assign(numOTUs, 1); centroids.assign(numOTUs, -1); @@ -940,6 +949,8 @@ void ShhherCommand::initPyroCluster(){ nSeqsBreaks.assign(processors+1, 0); nOTUsBreaks.assign(processors+1, 0); + if (m->debug) { m->mothurOut("[DEBUG]: made it through the memory allocation.\n"); } + nSeqsBreaks[0] = 0; for(int i=0;i filenames){ //divide the groups between the processors vector lines; + vector numFilesToComplete; int numFilesPerProcessor = filenames.size() / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numFilesPerProcessor; int endIndex = (i+1) * numFilesPerProcessor; if(i == (processors - 1)){ endIndex = filenames.size(); } lines.push_back(linePair(startIndex, endIndex)); + numFilesToComplete.push_back((endIndex-startIndex)); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) @@ -1946,6 +1959,14 @@ int ShhherCommand::createProcesses(vector filenames){ process++; }else if (pid == 0){ num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName + toString(getpid()) + ".temp", lines[process].start, lines[process].end); + + //pass numSeqs to parent + ofstream out; + string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out.close(); + exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -2008,6 +2029,18 @@ int ShhherCommand::createProcesses(vector filenames){ #endif for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { + int tempNum = 0; + in >> tempNum; + if (tempNum != numFilesToComplete[i+1]) { + m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n"); + } + } + in.close(); m->mothurRemove(tempFile); + if (compositeFASTAFileName != "") { m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName); m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName); @@ -2076,6 +2109,8 @@ vector ShhherCommand::parseFlowFiles(string filename){ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){ try { + int numCompleted = 0; + for(int i=start;icontrol_pressed) { break; } @@ -2106,6 +2141,7 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil vector uniqueLengths; int numFlowCells; + if (m->debug) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); } int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells); if (m->control_pressed) { break; } @@ -2162,6 +2198,8 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil vector nSeqsBreaks; vector nOTUsBreaks; + if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); } + dist.assign(numSeqs * numOTUs, 0); change.assign(numOTUs, 1); centroids.assign(numOTUs, -1); @@ -2175,6 +2213,8 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil nSeqsBreaks[1] = numSeqs; nOTUsBreaks[1] = numOTUs; + if (m->debug) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); } + if (m->control_pressed) { break; } double maxDelta = 0; @@ -2274,12 +2314,13 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil } } + numCompleted++; m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } - return 0; + return numCompleted; }catch(exception& e) { m->errorOut(e, "ShhherCommand", "driver"); @@ -2304,17 +2345,21 @@ int ShhherCommand::getFlowData(string filename, vector& thisSeqNameVecto thisNameMap.clear(); flowFile >> numFlowCells; + if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); } int index = 0;//pcluster while(!flowFile.eof()){ if (m->control_pressed) { break; } flowFile >> seqName >> currentNumFlowCells; + thisLengths.push_back(currentNumFlowCells); thisSeqNameVector.push_back(seqName); thisNameMap[seqName] = index++;//pcluster - + + if (m->debug) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); } + for(int i=0;i> intensity; if(intensity > 9.99) { intensity = 9.99; } @@ -2602,6 +2647,8 @@ int ShhherCommand::getOTUData(int numSeqs, string fileName, vector& otuDat listFile >> label >> numOTUs; + if (m->debug) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); } + otuData.assign(numSeqs, 0); cumNumSeqs.assign(numOTUs, 0); nSeqsPerOTU.assign(numOTUs, 0); @@ -2616,6 +2663,7 @@ int ShhherCommand::getOTUData(int numSeqs, string fileName, vector& otuDat for(int i=0;icontrol_pressed) { break; } + if (m->debug) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); } listFile >> singleOTU;