X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=shhhercommand.cpp;h=97629b600031c06c063f405175dd9fa369ae7c93;hb=2ecee16fec29d4c525f740ec19b27962ca09c050;hp=08bb017a4d025113c7317167e3eb6217fa57638d;hpb=36a6b02cf7f09d2bc34376b588944a9ca73429c5;p=mothur.git diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 08bb017..97629b6 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -47,6 +47,30 @@ string ShhherCommand::getHelpString(){ } } //********************************************************************************************************************** +string ShhherCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "shhh.fasta"; } + else if (type == "name") { outputFileName = "shhh.names"; } + else if (type == "group") { outputFileName = "shhh.groups"; } + else if (type == "counts") { outputFileName = "shhh.counts"; } + else if (type == "qfile") { outputFileName = "shhh.qual"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** ShhherCommand::ShhherCommand(){ try { @@ -54,8 +78,12 @@ ShhherCommand::ShhherCommand(){ setParameters(); //initialize outputTypes -// vector tempOutNames; -// outputTypes["pn.dist"] = tempOutNames; + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["counts"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; } catch(exception& e) { @@ -96,9 +124,13 @@ ShhherCommand::ShhherCommand(string option) { } //initialize outputTypes - vector tempOutNames; -// outputTypes["pn.dist"] = tempOutNames; - // outputTypes["fasta"] = tempOutNames; + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["counts"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -770,7 +802,7 @@ string ShhherCommand::createNamesFile(){ duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ','; } - string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names"; + string nameFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName) + getOutputFileNameTag("name"); ofstream nameFile; m->openOutputFile(nameFileName, nameFile); @@ -938,6 +970,8 @@ void ShhherCommand::initPyroCluster(){ try{ if (numOTUs < processors) { processors = 1; } + if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); } + dist.assign(numSeqs * numOTUs, 0); change.assign(numOTUs, 1); centroids.assign(numOTUs, -1); @@ -947,6 +981,8 @@ void ShhherCommand::initPyroCluster(){ nSeqsBreaks.assign(processors+1, 0); nOTUsBreaks.assign(processors+1, 0); + if (m->debug) { m->mothurOut("[DEBUG]: made it through the memory allocation.\n"); } + nSeqsBreaks[0] = 0; for(int i=0;i otuCounts){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); } - string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual"; + string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile"); ofstream qualityFile; m->openOutputFile(qualityFileName, qualityFile); @@ -1717,7 +1753,7 @@ void ShhherCommand::writeSequences(vector otuCounts){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); } - string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta"; + string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta"); ofstream fastaFile; m->openOutputFile(fastaFileName, fastaFile); @@ -1765,7 +1801,7 @@ void ShhherCommand::writeNames(vector otuCounts){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); } - string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names"; + string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name"); ofstream nameFile; m->openOutputFile(nameFileName, nameFile); @@ -1804,7 +1840,7 @@ void ShhherCommand::writeGroups(){ string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); } string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)); - string groupFileName = fileRoot + "shhh.groups"; + string groupFileName = fileRoot + getOutputFileNameTag("group"); ofstream groupFile; m->openOutputFile(groupFileName, groupFile); @@ -1828,7 +1864,7 @@ void ShhherCommand::writeClusters(vector otuCounts){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); } - string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts"; + string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) +getOutputFileNameTag("counts"); ofstream otuCountsFile; m->openOutputFile(otuCountsFileName, otuCountsFile); @@ -2137,6 +2173,7 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil vector uniqueLengths; int numFlowCells; + if (m->debug) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); } int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells); if (m->control_pressed) { break; } @@ -2193,6 +2230,8 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil vector nSeqsBreaks; vector nOTUsBreaks; + if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); } + dist.assign(numSeqs * numOTUs, 0); change.assign(numOTUs, 1); centroids.assign(numOTUs, -1); @@ -2206,6 +2245,8 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil nSeqsBreaks[1] = numSeqs; nOTUsBreaks[1] = numOTUs; + if (m->debug) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); } + if (m->control_pressed) { break; } double maxDelta = 0; @@ -2274,12 +2315,12 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil if ((large) && (g == 0)) { flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir = m->hasPath(flowFileName); } - string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual"; - string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta"; - string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names"; - string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts"; + string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile"); + string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta"); + string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name"); + string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("counts"); string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)); - string groupFileName = fileRoot + "shhh.groups"; + string groupFileName = fileRoot + getOutputFileNameTag("group"); writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; } @@ -2290,15 +2331,15 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil if (large) { if (g > 0) { - m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.qual")); + m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("qfile"))); m->mothurRemove(qualityFileName); - m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.fasta")); + m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("fasta"))); m->mothurRemove(fastaFileName); - m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.names")); + m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("name"))); m->mothurRemove(nameFileName); - m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.counts")); + m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("counts"))); m->mothurRemove(otuCountsFileName); - m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups")); + m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("group"))); m->mothurRemove(groupFileName); } m->mothurRemove(theseFlowFileNames[g]); @@ -2336,17 +2377,21 @@ int ShhherCommand::getFlowData(string filename, vector& thisSeqNameVecto thisNameMap.clear(); flowFile >> numFlowCells; + if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); } int index = 0;//pcluster while(!flowFile.eof()){ if (m->control_pressed) { break; } flowFile >> seqName >> currentNumFlowCells; + thisLengths.push_back(currentNumFlowCells); thisSeqNameVector.push_back(seqName); thisNameMap[seqName] = index++;//pcluster - + + if (m->debug) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); } + for(int i=0;i> intensity; if(intensity > 9.99) { intensity = 9.99; } @@ -2634,6 +2679,8 @@ int ShhherCommand::getOTUData(int numSeqs, string fileName, vector& otuDat listFile >> label >> numOTUs; + if (m->debug) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); } + otuData.assign(numSeqs, 0); cumNumSeqs.assign(numOTUs, 0); nSeqsPerOTU.assign(numOTUs, 0); @@ -2648,6 +2695,7 @@ int ShhherCommand::getOTUData(int numSeqs, string fileName, vector& otuDat for(int i=0;icontrol_pressed) { break; } + if (m->debug) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); } listFile >> singleOTU;