X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=shhhercommand.cpp;h=6a19618b503d13157a1fdea8ae5a234c019fe9af;hb=c67cf4168e1a124088b6f017946f0aa1fbdf1301;hp=224d3ce77cde444c07c0d1ebc393e270228fa620;hpb=136158a07155d3484f9318b553c38e57405f9a3d;p=mothur.git diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 224d3ce..6a19618 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -16,6 +16,7 @@ #include "listvector.hpp" #include "cluster.hpp" #include "sparsematrix.hpp" +#include //********************************************************************************************************************** @@ -47,7 +48,7 @@ vector ShhherCommand::getValidParameters(){ ShhherCommand::ShhherCommand(){ try { - abort = true; + abort = true; calledHelp = true; //initialize outputTypes vector tempOutNames; @@ -100,11 +101,11 @@ ShhherCommand::ShhherCommand(string option) { #endif - abort = false; + abort = false; calledHelp = false; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } else { @@ -170,7 +171,15 @@ ShhherCommand::ShhherCommand(string option) { m->mothurOutEndLine(); abort = true; } - else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; } + else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; } + + if(flowFileName != "not found"){ compositeFASTAFileName = ""; } + else{ + compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "pn.fasta"; + ofstream temp; + m->openOutputFile(compositeFASTAFileName, temp); + temp.close(); + } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -183,7 +192,7 @@ ShhherCommand::ShhherCommand(string option) { // ...at some point should added some additional type checking... string temp; temp = validParameter.validFile(parameters, "lookup", true); - if (temp == "not found") { lookupFileName = "LookUp_E123.pat"; } + if (temp == "not found") { lookupFileName = "LookUp_Titanium.pat"; } else if(temp == "not open") { abort = true; } else { lookupFileName = temp; } @@ -236,16 +245,13 @@ void ShhherCommand::help(){ #ifdef USE_MPI int ShhherCommand::execute(){ try { + if (abort == true) { if (calledHelp) { return 0; } return 2; } + int tag = 1976; MPI_Status status; double begClock = clock(); unsigned long int begTime = time(NULL); - - cout.setf(ios::fixed, ios::floatfield); - cout.setf(ios::showpoint); - cout << setprecision(2); - if(pid == 0){ @@ -256,10 +262,10 @@ int ShhherCommand::execute(){ processors = ncpus; - cout << "\nGetting preliminary data..." << endl; + m->mothurOut("\nGetting preliminary data...\n"); getSingleLookUp(); getJointLookUp(); - + vector flowFileVector; if(flowFilesFileName != "not found"){ string fName; @@ -284,13 +290,14 @@ int ShhherCommand::execute(){ for(int i=0;i>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl; - cout << "Reading flowgrams..." << endl; + m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n"); + m->mothurOut("Reading flowgrams...\n"); + getFlowData(); - cout << "Identifying unique flowgrams..." << endl; + m->mothurOut("Identifying unique flowgrams...\n"); getUniques(); - cout << "Calculating distances between flowgrams..." << endl; + m->mothurOut("Calculating distances between flowgrams...\n"); char fileName[1024]; strcpy(fileName, flowFileName.c_str()); @@ -321,11 +328,9 @@ int ShhherCommand::execute(){ string namesFileName = createNamesFile(); - cout << "\nClustering flowgrams..." << endl; + m->mothurOut("\nClustering flowgrams...\n"); string listFileName = cluster(distFileName, namesFileName); - // string listFileName = "PriestPot_C7.pn.list"; - // string listFileName = "test.mock_rep3.v69.pn.list"; - + getOTUData(listFileName); initPyroCluster(); @@ -342,9 +347,8 @@ int ShhherCommand::execute(){ int numOTUsOnCPU = numOTUs / ncpus; int numSeqsOnCPU = numSeqs / ncpus; - - cout << "\nDenoising flowgrams..." << endl; - cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl; + m->mothurOut("\nDenoising flowgrams...\n"); + m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n"); while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ @@ -445,7 +449,7 @@ int ShhherCommand::execute(){ iter++; - cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl; + m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n'); if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ int live = 1; @@ -462,7 +466,7 @@ int ShhherCommand::execute(){ } - cout << "\nFinalizing..." << endl; + m->mothurOut("\nFinalizing...\n"); fill(); setOTUs(); vector otuCounts(numOTUs, 0); @@ -477,10 +481,9 @@ int ShhherCommand::execute(){ remove(distFileName.c_str()); remove(namesFileName.c_str()); remove(listFileName.c_str()); - - cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl; + + m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } - } else{ int abort = 1; @@ -623,10 +626,11 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){ } } if(i % 100 == 0){ - cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl; + m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n'); } } - cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl; + + m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n'); string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist"; if(pid != 0){ fDistFileName += ".temp." + toString(pid); } @@ -650,13 +654,9 @@ int ShhherCommand::execute(){ try { if (abort == true) { return 0; } - cout.setf(ios::fixed, ios::floatfield); - cout.setf(ios::showpoint); - getSingleLookUp(); getJointLookUp(); - - + vector flowFileVector; if(flowFilesFileName != "not found"){ string fName; @@ -678,18 +678,19 @@ int ShhherCommand::execute(){ for(int i=0;i>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl; - cout << "Reading flowgrams..." << endl; + m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n"); + m->mothurOut("Reading flowgrams...\n"); getFlowData(); - cout << "Identifying unique flowgrams..." << endl; + + m->mothurOut("Identifying unique flowgrams...\n"); getUniques(); - cout << "Calculating distances between flowgrams..." << endl; + m->mothurOut("Calculating distances between flowgrams...\n"); string distFileName = createDistFile(processors); string namesFileName = createNamesFile(); - - cout << "\nClustering flowgrams..." << endl; + + m->mothurOut("\nClustering flowgrams...\n"); string listFileName = cluster(distFileName, namesFileName); getOTUData(listFileName); @@ -701,8 +702,8 @@ int ShhherCommand::execute(){ double begClock = clock(); unsigned long int begTime = time(NULL); - cout << "\nDenoising flowgrams..." << endl; - cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl; + m->mothurOut("\nDenoising flowgrams...\n"); + m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n"); while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ @@ -720,10 +721,11 @@ int ShhherCommand::execute(){ iter++; - cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl; + m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n'); + } - cout << "\nFinalizing..." << endl; + m->mothurOut("\nFinalizing...\n"); fill(); setOTUs(); @@ -741,7 +743,7 @@ int ShhherCommand::execute(){ remove(namesFileName.c_str()); remove(listFileName.c_str()); - cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl; + m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } return 0; } @@ -838,10 +840,10 @@ void ShhherCommand::getJointLookUp(){ for(int i=0;i current(numFlowCells); - for(int j=0;j uniqueLengths[j]) { uniqueLengths[j] = lengths[i]; } break; } index++; @@ -990,10 +1001,10 @@ void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int st float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]); if(flowDistance < 1e-6){ - outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << 0.000000 << endl; + outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl; } else if(flowDistance <= cutoff){ - outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << flowDistance << endl; + outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl; } } if(i % 100 == 0){ @@ -1086,7 +1097,8 @@ string ShhherCommand::createDistFile(int processors){ m->mothurOutEndLine(); - cout << "Total time: " << (time(NULL) - begTime) << "\t" << (clock() - begClock)/CLOCKS_PER_SEC << endl;; + m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n'); + return fDistFileName; } @@ -1131,9 +1143,6 @@ string ShhherCommand::createNamesFile(){ string ShhherCommand::cluster(string distFileName, string namesFileName){ try { - SparseMatrix* matrix; - ListVector* list; - RAbundVector* rabund; globaldata->setNameFile(namesFileName); globaldata->setColumnFile(distFileName); @@ -1146,13 +1155,13 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){ clusterNameMap->readMap(); read->read(clusterNameMap); - list = read->getListVector(); - matrix = read->getMatrix(); + ListVector* list = read->getListVector(); + SparseMatrix* matrix = read->getMatrix(); delete read; delete clusterNameMap; - rabund = new RAbundVector(list->getRAbundVector()); + RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); string tag = cluster->getTag(); @@ -1160,7 +1169,6 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){ double clusterCutoff = cutoff; while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){ cluster->update(clusterCutoff); - float dist = matrix->getSmallDist(); } list->setLabel(toString(cutoff)); @@ -1411,7 +1419,6 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){ for(int j=0;j P(numSeqs, 0); + vector P(numSeqs, 0); int effNumOTUs = 0; for(int i=0;i otuCounts){ vector > qualities(numOTUs); vector pr(HOMOPS, 0); - int index = 0; for(int i=0;i otuCounts){ void ShhherCommand::writeSequences(vector otuCounts){ try { - string bases = "TACG"; string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta"; @@ -2016,6 +2022,10 @@ void ShhherCommand::writeSequences(vector otuCounts){ } } fastaFile.close(); + + if(compositeFASTAFileName != ""){ + m->appendFiles(fastaFileName, compositeFASTAFileName); + } } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeSequences");