X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=shhhercommand.cpp;h=f7c3e59714f4109398f97fb67d8dcb908a1ae9fd;hb=9c37bed33eaa0bb2d3c499085c0e165c0f00886b;hp=6833f5d4c1cae8f4ee0e3b6f3da3bd2474b31285;hpb=83bf613d5a7ffb4dcc75b3cad106d1eed1f9e498;p=mothur.git diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 6833f5d..f7c3e59 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -26,64 +26,56 @@ #define MIN_WEIGHT 0.1 #define MIN_TAU 0.0001 #define MIN_ITER 10 - //********************************************************************************************************************** - -vector ShhherCommand::getValidParameters(){ +vector ShhherCommand::setParameters(){ try { - string Array[] = { - "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta" - }; + CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow); + CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile); + CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup); + CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); + CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter); + CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma); + CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta); + CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "ShhherCommand", "getValidParameters"); + m->errorOut(e, "ShhherCommand", "setParameters"); exit(1); } } - //********************************************************************************************************************** - -ShhherCommand::ShhherCommand(){ +string ShhherCommand::getHelpString(){ try { - abort = true; calledHelp = true; - - //initialize outputTypes - vector tempOutNames; - outputTypes["pn.dist"] = tempOutNames; - + string helpString = ""; + helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "ShhherCommand", "ShhherCommand"); + m->errorOut(e, "ShhherCommand", "getHelpString"); exit(1); } } - //********************************************************************************************************************** -vector ShhherCommand::getRequiredParameters(){ +ShhherCommand::ShhherCommand(){ try { - string Array[] = {"flow"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; - } - catch(exception& e) { - m->errorOut(e, "ShhherCommand", "getRequiredParameters"); - exit(1); - } -} - -//********************************************************************************************************************** + abort = true; calledHelp = true; + setParameters(); + + //initialize outputTypes +// vector tempOutNames; +// outputTypes["pn.dist"] = tempOutNames; -vector ShhherCommand::getRequiredFiles(){ - try { - vector myArray; - return myArray; } catch(exception& e) { - m->errorOut(e, "ShhherCommand", "getRequiredFiles"); + m->errorOut(e, "ShhherCommand", "ShhherCommand"); exit(1); } } @@ -106,15 +98,10 @@ ShhherCommand::ShhherCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - - //valid paramters for this command - string AlignArray[] = { - "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta" - }; - - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -129,7 +116,7 @@ ShhherCommand::ShhherCommand(string option) { //initialize outputTypes vector tempOutNames; - outputTypes["pn.dist"] = tempOutNames; +// outputTypes["pn.dist"] = tempOutNames; // outputTypes["fasta"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -173,12 +160,20 @@ ShhherCommand::ShhherCommand(string option) { } else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; } - if(flowFileName != "not found"){ compositeFASTAFileName = ""; } + if(flowFileName != "not found"){ + compositeFASTAFileName = ""; + compositeNamesFileName = ""; + } else{ - compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "pn.fasta"; ofstream temp; + + compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta"; m->openOutputFile(compositeFASTAFileName, temp); temp.close(); + + compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names"; + m->openOutputFile(compositeNamesFileName, temp); + temp.close(); } //if the user changes the output directory command factory will send this info to us in the output parameter @@ -196,8 +191,9 @@ ShhherCommand::ShhherCommand(string option) { else if(temp == "not open") { abort = true; } else { lookupFileName = temp; } - temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){ temp = "1"; } - convert(temp, processors); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } + m->setProcessors(temp); + convert(temp, processors); temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; } convert(temp, cutoff); @@ -211,7 +207,12 @@ ShhherCommand::ShhherCommand(string option) { temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; } convert(temp, sigma); - globaldata = GlobalData::getInstance(); + flowOrder = validParameter.validFile(parameters, "order", false); + if (flowOrder == "not found"){ flowOrder = "TACG"; } + else if(flowOrder.length() != 4){ + m->mothurOut("The value of the order option must be four bases long\n"); + } + } #ifdef USE_MPI @@ -224,23 +225,6 @@ ShhherCommand::ShhherCommand(string option) { exit(1); } } - -//********************************************************************************************************************** - -ShhherCommand::~ShhherCommand(){} - -//********************************************************************************************************************** - -void ShhherCommand::help(){ - try { - m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n"); - } - catch(exception& e) { - m->errorOut(e, "ShhherCommand", "help"); - exit(1); - } -} - //********************************************************************************************************************** #ifdef USE_MPI int ShhherCommand::execute(){ @@ -250,9 +234,6 @@ int ShhherCommand::execute(){ int tag = 1976; MPI_Status status; - double begClock = clock(); - unsigned long int begTime = time(NULL); - if(pid == 0){ for(int i=1;imothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n"); m->mothurOut("Reading flowgrams...\n"); @@ -334,6 +316,11 @@ int ShhherCommand::execute(){ string listFileName = cluster(distFileName, namesFileName); getOTUData(listFileName); + + remove(distFileName.c_str()); + remove(namesFileName.c_str()); + remove(listFileName.c_str()); + initPyroCluster(); for(int i=1;imothurOut("\nFinalizing...\n"); fill(); setOTUs(); + vector otuCounts(numOTUs, 0); for(int i=0;imothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } } else{ int abort = 1; - bool live = 1; MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); if(abort){ return 0; } @@ -499,6 +484,8 @@ int ShhherCommand::execute(){ for(int i=0;imothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; } @@ -634,7 +631,7 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){ m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n'); - string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist"; + string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist"; if(pid != 0){ fDistFileName += ".temp." + toString(pid); } ofstream distFile(fDistFileName.c_str()); @@ -696,6 +693,10 @@ int ShhherCommand::execute(){ string listFileName = cluster(distFileName, namesFileName); getOTUData(listFileName); + remove(distFileName.c_str()); + remove(namesFileName.c_str()); + remove(listFileName.c_str()); + initPyroCluster(); double maxDelta = 0; @@ -704,7 +705,6 @@ int ShhherCommand::execute(){ double begClock = clock(); unsigned long int begTime = time(NULL); - m->mothurOut("\nDenoising flowgrams...\n"); m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n"); @@ -742,12 +742,19 @@ int ShhherCommand::execute(){ writeClusters(otuCounts); writeGroups(); - remove(distFileName.c_str()); - remove(namesFileName.c_str()); - remove(listFileName.c_str()); - m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } + + if(compositeFASTAFileName != ""){ + outputNames.push_back(compositeFASTAFileName); + outputNames.push_back(compositeNamesFileName); + } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; } catch(exception& e) { @@ -1039,7 +1046,7 @@ void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int st string ShhherCommand::createDistFile(int processors){ try{ - string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist"; + string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist"; unsigned long int begTime = time(NULL); double begClock = clock(); @@ -1127,7 +1134,7 @@ string ShhherCommand::createNamesFile(){ duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ','; } - string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.names"; + string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names"; ofstream nameFile; m->openOutputFile(nameFileName, nameFile); @@ -1151,11 +1158,6 @@ string ShhherCommand::createNamesFile(){ string ShhherCommand::cluster(string distFileName, string namesFileName){ try { - - globaldata->setNameFile(namesFileName); - globaldata->setColumnFile(distFileName); - globaldata->setFormat("column"); - ReadMatrix* read = new ReadColumnMatrix(distFileName); read->setCutoff(cutoff); @@ -1181,7 +1183,7 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){ list->setLabel(toString(cutoff)); - string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.list"; + string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list"; ofstream listFile; m->openOutputFile(listFileName, listFile); list->print(listFile); @@ -1912,7 +1914,7 @@ void ShhherCommand::setOTUs(){ void ShhherCommand::writeQualities(vector otuCounts){ try { - string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.qual"; + string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual"; ofstream qualityFile; m->openOutputFile(qualityFileName, qualityFile); @@ -2001,7 +2003,8 @@ void ShhherCommand::writeQualities(vector otuCounts){ } } qualityFile.close(); - + outputNames.push_back(qualityFileName); + } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeQualities"); @@ -2013,9 +2016,8 @@ void ShhherCommand::writeQualities(vector otuCounts){ void ShhherCommand::writeSequences(vector otuCounts){ try { - string bases = "TACG"; - string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta"; + string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta"; ofstream fastaFile; m->openOutputFile(fastaFileName, fastaFile); @@ -2027,18 +2029,23 @@ void ShhherCommand::writeSequences(vector otuCounts){ if(otuCounts[i] > 0){ fastaFile << '>' << seqNameVector[aaI[i][0]] << endl; - for(int j=8;jappendFiles(fastaFileName, compositeFASTAFileName); } @@ -2053,7 +2060,7 @@ void ShhherCommand::writeSequences(vector otuCounts){ void ShhherCommand::writeNames(vector otuCounts){ try { - string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.final.names"; + string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names"; ofstream nameFile; m->openOutputFile(nameFileName, nameFile); @@ -2069,6 +2076,12 @@ void ShhherCommand::writeNames(vector otuCounts){ } } nameFile.close(); + outputNames.push_back(nameFileName); + + + if(compositeNamesFileName != ""){ + m->appendFiles(nameFileName, compositeNamesFileName); + } } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeNames"); @@ -2081,7 +2094,7 @@ void ShhherCommand::writeNames(vector otuCounts){ void ShhherCommand::writeGroups(){ try { string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.')); - string groupFileName = fileRoot + ".pn.groups"; + string groupFileName = fileRoot + ".shhh.groups"; ofstream groupFile; m->openOutputFile(groupFileName, groupFile); @@ -2089,6 +2102,8 @@ void ShhherCommand::writeGroups(){ groupFile << seqNameVector[i] << '\t' << fileRoot << endl; } groupFile.close(); + outputNames.push_back(groupFileName); + } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeGroups"); @@ -2100,11 +2115,11 @@ void ShhherCommand::writeGroups(){ void ShhherCommand::writeClusters(vector otuCounts){ try { - string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.counts"; + string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts"; ofstream otuCountsFile; m->openOutputFile(otuCountsFileName, otuCountsFile); - string bases = "TACG"; + string bases = flowOrder; for(int i=0;i otuCounts){ int sequence = aaI[i][j]; otuCountsFile << seqNameVector[sequence] << '\t'; - for(int k=8;kerrorOut(e, "ShhherCommand", "writeClusters");