vector<string> ShhherCommand::getValidParameters(){
try {
string Array[] = {
- "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"
+ "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"
};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
ShhherCommand::ShhherCommand(){
try {
- abort = true;
+ abort = true; calledHelp = true;
//initialize outputTypes
vector<string> tempOutNames;
#endif
- abort = false;
+ abort = false; calledHelp = false;
//allow user to run help
- if(option == "help") { help(); abort = true; }
+ if(option == "help") { help(); abort = true; calledHelp = true; }
else {
//valid paramters for this command
string AlignArray[] = {
- "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"
+ "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"
};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
m->mothurOutEndLine();
abort = true;
}
- else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
+ else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
+
+ if(flowFileName != "not found"){ compositeFASTAFileName = ""; }
+ else{
+ compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "pn.fasta";
+ ofstream temp;
+ m->openOutputFile(compositeFASTAFileName, temp);
+ temp.close();
+ }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
#ifdef USE_MPI
int ShhherCommand::execute(){
try {
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
int tag = 1976;
MPI_Status status;
- double begClock = clock();
- unsigned long int begTime = time(NULL);
-
- cout.setf(ios::fixed, ios::floatfield);
- cout.setf(ios::showpoint);
- cout << setprecision(2);
-
-
if(pid == 0){
for(int i=1;i<ncpus;i++){
processors = ncpus;
- cout << "\nGetting preliminary data..." << endl;
+ m->mothurOut("\nGetting preliminary data...\n");
getSingleLookUp();
getJointLookUp();
}
for(int i=0;i<numFiles;i++){
+ double begClock = clock();
+ unsigned long int begTime = time(NULL);
+
flowFileName = flowFileVector[i];
-
- cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
- cout << "Reading flowgrams..." << endl;
+
+ m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+ m->mothurOut("Reading flowgrams...\n");
getFlowData();
- cout << "Identifying unique flowgrams..." << endl;
+
+ m->mothurOut("Identifying unique flowgrams...\n");
getUniques();
- cout << "Calculating distances between flowgrams..." << endl;
+ m->mothurOut("Calculating distances between flowgrams...\n");
char fileName[1024];
strcpy(fileName, flowFileName.c_str());
string namesFileName = createNamesFile();
- cout << "\nClustering flowgrams..." << endl;
+ m->mothurOut("\nClustering flowgrams...\n");
string listFileName = cluster(distFileName, namesFileName);
- // string listFileName = "PriestPot_C7.pn.list";
- // string listFileName = "test.mock_rep3.v69.pn.list";
-
+
getOTUData(listFileName);
initPyroCluster();
int numOTUsOnCPU = numOTUs / ncpus;
int numSeqsOnCPU = numSeqs / ncpus;
-
- cout << "\nDenoising flowgrams..." << endl;
- cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+ m->mothurOut("\nDenoising flowgrams...\n");
+ m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
}
-
+
calcCentroidsDriver(0, numOTUsOnCPU);
for(int i=1;i<ncpus;i++){
iter++;
- cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;
+ m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
int live = 1;
}
- cout << "\nFinalizing..." << endl;
+ m->mothurOut("\nFinalizing...\n");
fill();
setOTUs();
vector<int> otuCounts(numOTUs, 0);
remove(distFileName.c_str());
remove(namesFileName.c_str());
remove(listFileName.c_str());
-
- cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;
+
+ m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
-
}
else{
int abort = 1;
- bool live = 1;
MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
if(abort){ return 0; }
for(int i=0;i<numFiles;i++){
//Now into the pyrodist part
+ bool live = 1;
+
char fileName[1024];
MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
int total;
while(live){
-
+
MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
singleTau.assign(total, 0.0000);
seqNumber.assign(total, 0);
seqIndex.assign(total, 0);
-
+
MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(¢roids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-
+
calcCentroidsDriver(startOTU, endOTU);
MPI_Send(¢roids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
-
MPI_Recv(¢roids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
}
}
}
-
MPI_Barrier(MPI_COMM_WORLD);
+
return 0;
}
}
}
if(i % 100 == 0){
- cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+ m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
}
}
- cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+
+ m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
if(pid != 0){ fDistFileName += ".temp." + toString(pid); }
try {
if (abort == true) { return 0; }
- cout.setf(ios::fixed, ios::floatfield);
- cout.setf(ios::showpoint);
-
getSingleLookUp();
getJointLookUp();
for(int i=0;i<numFiles;i++){
flowFileName = flowFileVector[i];
- cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
- cout << "Reading flowgrams..." << endl;
+ m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+ m->mothurOut("Reading flowgrams...\n");
getFlowData();
- cout << "Identifying unique flowgrams..." << endl;
+
+ m->mothurOut("Identifying unique flowgrams...\n");
getUniques();
- cout << "Calculating distances between flowgrams..." << endl;
+ m->mothurOut("Calculating distances between flowgrams...\n");
string distFileName = createDistFile(processors);
string namesFileName = createNamesFile();
-
- cout << "\nClustering flowgrams..." << endl;
+
+ m->mothurOut("\nClustering flowgrams...\n");
string listFileName = cluster(distFileName, namesFileName);
getOTUData(listFileName);
double begClock = clock();
unsigned long int begTime = time(NULL);
- cout << "\nDenoising flowgrams..." << endl;
- cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+ m->mothurOut("\nDenoising flowgrams...\n");
+ m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
iter++;
- cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;
+ m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+
}
- cout << "\nFinalizing..." << endl;
+ m->mothurOut("\nFinalizing...\n");
fill();
setOTUs();
remove(namesFileName.c_str());
remove(listFileName.c_str());
- cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;
+ m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
return 0;
}
m->openInputFile(flowFileName, flowFile);
string seqName;
+ seqNameVector.clear();
+ lengths.clear();
+ flowDataIntI.clear();
+ nameMap.clear();
+
int currentNumFlowCells;
for(int i=0;i<NUMBINS;i++){
for(int j=0;j<NUMBINS;j++){
- double minSum = 10000000000;
+ double minSum = 100000000;
for(int k=0;k<HOMOPS;k++){
double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
/**************************************************************************************************/
-float ShhherCommand::getProbIntensity(int intIntensity){
+double ShhherCommand::getProbIntensity(int intIntensity){
try{
- double minNegLogProb = 10000000000;
+
+ double minNegLogProb = 100000000;
+
for(int i=0;i<HOMOPS;i++){//loop signal strength
float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
int index = 0;
vector<short> current(numFlowCells);
- for(int j=0;j<numFlowCells;j++){ current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0)); }
+ for(int j=0;j<numFlowCells;j++){
+ current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+ }
for(int j=0;j<numUniques;j++){
int offset = j * numFlowCells;
bool toEnd = 1;
- for(int k=0;k<numFlowCells;k++){
+ int shorterLength;
+ if(lengths[i] < uniqueLengths[j]) { shorterLength = lengths[i]; }
+ else { shorterLength = uniqueLengths[j]; }
+
+ for(int k=0;k<shorterLength;k++){
if(current[k] != uniqueFlowgrams[offset + k]){
toEnd = 0;
break;
mapSeqToUnique[i] = j;
uniqueCount[j]++;
index = j;
+ if(lengths[i] > uniqueLengths[j]) { uniqueLengths[j] = lengths[i]; }
break;
}
index++;
uniqueFlowDataIntI.resize(numFlowCells * numUniques);
uniqueLengths.resize(numUniques);
- flowDataPrI.assign(numSeqs * numFlowCells, 0);
+ flowDataPrI.resize(numSeqs * numFlowCells, 0);
for(int i=0;i<flowDataPrI.size();i++) { flowDataPrI[i] = getProbIntensity(flowDataIntI[i]); }
}
catch(exception& e) {
float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
if(flowDistance < 1e-6){
- outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << 0.000000 << endl;
+ outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
}
else if(flowDistance <= cutoff){
- outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << flowDistance << endl;
+ outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
}
}
if(i % 100 == 0){
m->mothurOutEndLine();
- cout << "Total time: " << (time(NULL) - begTime) << "\t" << (clock() - begClock)/CLOCKS_PER_SEC << endl;;
+ m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+
return fDistFileName;
}
string ShhherCommand::cluster(string distFileName, string namesFileName){
try {
- SparseMatrix* matrix;
- ListVector* list;
- RAbundVector* rabund;
globaldata->setNameFile(namesFileName);
globaldata->setColumnFile(distFileName);
clusterNameMap->readMap();
read->read(clusterNameMap);
- list = read->getListVector();
- matrix = read->getMatrix();
+ ListVector* list = read->getListVector();
+ SparseMatrix* matrix = read->getMatrix();
delete read;
delete clusterNameMap;
- rabund = new RAbundVector(list->getRAbundVector());
+ RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest");
string tag = cluster->getTag();
otuData.assign(numSeqs, 0);
cumNumSeqs.assign(numOTUs, 0);
nSeqsPerOTU.assign(numOTUs, 0);
- aaP.resize(numOTUs);
+ aaP.clear();aaP.resize(numOTUs);
+
+ seqNumber.clear();
+ aaI.clear();
+ seqIndex.clear();
string singleOTU = "";
for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
aaP[i].push_back(0);
}
+
+
}
for(int i=1;i<numOTUs;i++){
seqIndex = seqNumber;
listFile.close();
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "getOTUData");
try{
+
for(int i=start;i<finish;i++){
double count = 0;
for(int j=0;j<nSeqsPerOTU[i];j++){
count += singleTau[seqNumber[cumNumSeqs[i] + j]];
}
-
+
if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
vector<double> adF(nSeqsPerOTU[i]);
vector<int> anL(nSeqsPerOTU[i]);
try{
vector<double> newTau(numOTUs,0);
vector<double> norms(numSeqs, 0);
- otuIndex.resize(0);
- seqIndex.resize(0);
- singleTau.resize(0);
+ otuIndex.clear();
+ seqIndex.clear();
+ singleTau.clear();
void ShhherCommand::writeSequences(vector<int> otuCounts){
try {
-
string bases = "TACG";
string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
}
}
fastaFile.close();
+
+ if(compositeFASTAFileName != ""){
+ m->appendFiles(fastaFileName, compositeFASTAFileName);
+ }
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "writeSequences");