#include <cfloat>
//**********************************************************************************************************************
-
-#define NUMBINS 1000
-#define HOMOPS 10
-#define MIN_COUNT 0.1
-#define MIN_WEIGHT 0.1
-#define MIN_TAU 0.0001
-#define MIN_ITER 10
-
-//**********************************************************************************************************************
-
-vector<string> ShhherCommand::getValidParameters(){
+vector<string> ShhherCommand::setParameters(){
try {
- string Array[] = {
- "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"
- };
+ CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
+ CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
+ CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plookup);
+ CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+ CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
+ CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
+ CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
+ CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
}
catch(exception& e) {
- m->errorOut(e, "ShhherCommand", "getValidParameters");
+ m->errorOut(e, "ShhherCommand", "setParameters");
exit(1);
}
}
-
//**********************************************************************************************************************
-
-ShhherCommand::ShhherCommand(){
+string ShhherCommand::getHelpString(){
try {
- abort = true; calledHelp = true;
-
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["pn.dist"] = tempOutNames;
-
+ string helpString = "";
+ helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+ return helpString;
}
catch(exception& e) {
- m->errorOut(e, "ShhherCommand", "ShhherCommand");
+ m->errorOut(e, "ShhherCommand", "getHelpString");
exit(1);
}
}
-
//**********************************************************************************************************************
-vector<string> ShhherCommand::getRequiredParameters(){
+ShhherCommand::ShhherCommand(){
try {
- string Array[] = {"flow"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "ShhherCommand", "getRequiredParameters");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
+ abort = true; calledHelp = true;
+ setParameters();
+
+ //initialize outputTypes
+// vector<string> tempOutNames;
+// outputTypes["pn.dist"] = tempOutNames;
-vector<string> ShhherCommand::getRequiredFiles(){
- try {
- vector<string> myArray;
- return myArray;
}
catch(exception& e) {
- m->errorOut(e, "ShhherCommand", "getRequiredFiles");
+ m->errorOut(e, "ShhherCommand", "ShhherCommand");
exit(1);
}
}
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
-
- //valid paramters for this command
- string AlignArray[] = {
- "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"
- };
-
- vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ vector<string> myArray = setParameters();
OptionParser parser(option);
map<string,string> parameters = parser.getParameters();
//initialize outputTypes
vector<string> tempOutNames;
- outputTypes["pn.dist"] = tempOutNames;
+// outputTypes["pn.dist"] = tempOutNames;
// outputTypes["fasta"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
}
else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
- if(flowFileName != "not found"){ compositeFASTAFileName = ""; }
+ if(flowFileName != "not found"){
+ compositeFASTAFileName = "";
+ compositeNamesFileName = "";
+ }
else{
- compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "pn.fasta";
ofstream temp;
+
+ //flow.files = 9 character offset
+ compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
m->openOutputFile(compositeFASTAFileName, temp);
temp.close();
+
+ compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names";
+ m->openOutputFile(compositeNamesFileName, temp);
+ temp.close();
}
//if the user changes the output directory command factory will send this info to us in the output parameter
// ...at some point should added some additional type checking...
string temp;
temp = validParameter.validFile(parameters, "lookup", true);
- if (temp == "not found") { lookupFileName = "LookUp_Titanium.pat"; }
- else if(temp == "not open") { abort = true; }
- else { lookupFileName = temp; }
+ if (temp == "not found") {
+ lookupFileName = "LookUp_Titanium.pat";
+
+ int ableToOpen;
+ ifstream in;
+ ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
+ in.close();
+
+ //if you can't open it, try input location
+ if (ableToOpen == 1) {
+ if (inputDir != "") { //default path is set
+ string tryPath = inputDir + lookupFileName;
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+ }
+ }
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+ }
+ }
+
+ //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+ if (ableToOpen == 1) {
+ string exepath = m->argv;
+ string tempPath = exepath;
+ for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+ exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+
+ string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+ }
+
+ if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
+ }
+ else if(temp == "not open") {
+
+ lookupFileName = validParameter.validFile(parameters, "lookup", false);
+
+ //if you can't open it its not inputDir, try mothur excutable location
+ string exepath = m->argv;
+ string tempPath = exepath;
+ for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+ exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+
+ string tryPath = m->getFullPathName(exepath) + lookupFileName;
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+
+ if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
+ }else { lookupFileName = temp; }
- temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){ temp = "1"; }
- convert(temp, processors);
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ convert(temp, processors);
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; }
convert(temp, cutoff);
temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; }
convert(temp, sigma);
- globaldata = GlobalData::getInstance();
+ flowOrder = validParameter.validFile(parameters, "order", false);
+ if (flowOrder == "not found"){ flowOrder = "TACG"; }
+ else if(flowOrder.length() != 4){
+ m->mothurOut("The value of the order option must be four bases long\n");
+ }
+
}
#ifdef USE_MPI
exit(1);
}
}
-
-//**********************************************************************************************************************
-
-ShhherCommand::~ShhherCommand(){}
-
-//**********************************************************************************************************************
-
-void ShhherCommand::help(){
- try {
- m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n");
- }
- catch(exception& e) {
- m->errorOut(e, "ShhherCommand", "help");
- exit(1);
- }
-}
-
//**********************************************************************************************************************
#ifdef USE_MPI
int ShhherCommand::execute(){
int tag = 1976;
MPI_Status status;
- double begClock = clock();
- unsigned long int begTime = time(NULL);
-
- cout.setf(ios::fixed, ios::floatfield);
- cout.setf(ios::showpoint);
- cout << setprecision(2);
-
-
if(pid == 0){
for(int i=1;i<ncpus;i++){
processors = ncpus;
- cout << "\nGetting preliminary data..." << endl;
- getSingleLookUp();
- getJointLookUp();
+ m->mothurOut("\nGetting preliminary data...\n");
+ getSingleLookUp(); if (m->control_pressed) { return 0; }
+ getJointLookUp(); if (m->control_pressed) { return 0; }
vector<string> flowFileVector;
if(flowFilesFileName != "not found"){
ifstream flowFilesFile;
m->openInputFile(flowFilesFileName, flowFilesFile);
while(flowFilesFile){
- flowFilesFile >> fName;
+ fName = m->getline(flowFilesFile);
flowFileVector.push_back(fName);
m->gobble(flowFilesFile);
}
}
for(int i=0;i<numFiles;i++){
+
+ if (m->control_pressed) { break; }
+
+ double begClock = clock();
+ unsigned long long begTime = time(NULL);
+
flowFileName = flowFileVector[i];
-
- cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
- cout << "Reading flowgrams..." << endl;
+
+ m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+ m->mothurOut("Reading flowgrams...\n");
getFlowData();
- cout << "Identifying unique flowgrams..." << endl;
+
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("Identifying unique flowgrams...\n");
getUniques();
+
+ if (m->control_pressed) { break; }
- cout << "Calculating distances between flowgrams..." << endl;
+ m->mothurOut("Calculating distances between flowgrams...\n");
char fileName[1024];
strcpy(fileName, flowFileName.c_str());
string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
+ if (m->control_pressed) { break; }
+
int done;
for(int i=1;i<ncpus;i++){
MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
- remove((distFileName + ".temp." + toString(i)).c_str());
+ m->mothurRemove((distFileName + ".temp." + toString(i)));
}
string namesFileName = createNamesFile();
- cout << "\nClustering flowgrams..." << endl;
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("\nClustering flowgrams...\n");
string listFileName = cluster(distFileName, namesFileName);
-
+
+ if (m->control_pressed) { break; }
+
getOTUData(listFileName);
+
+ m->mothurRemove(distFileName);
+ m->mothurRemove(namesFileName);
+ m->mothurRemove(listFileName);
+
+ if (m->control_pressed) { break; }
+
initPyroCluster();
+ if (m->control_pressed) { break; }
+
for(int i=1;i<ncpus;i++){
MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
}
+ if (m->control_pressed) { break; }
double maxDelta = 0;
int iter = 0;
int numOTUsOnCPU = numOTUs / ncpus;
int numSeqsOnCPU = numSeqs / ncpus;
-
- cout << "\nDenoising flowgrams..." << endl;
- cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+ m->mothurOut("\nDenoising flowgrams...\n");
+ m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
-
+
double cycClock = clock();
- unsigned long int cycTime = time(NULL);
+ unsigned long long cycTime = time(NULL);
fill();
+
+ if (m->control_pressed) { break; }
int total = singleTau.size();
for(int i=1;i<ncpus;i++){
MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
}
-
+
calcCentroidsDriver(0, numOTUsOnCPU);
for(int i=1;i<ncpus;i++){
}
}
- maxDelta = getNewWeights();
- double nLL = getLikelihood();
- checkCentroids();
+ maxDelta = getNewWeights(); if (m->control_pressed) { break; }
+ double nLL = getLikelihood(); if (m->control_pressed) { break; }
+ checkCentroids(); if (m->control_pressed) { break; }
for(int i=1;i<ncpus;i++){
MPI_Send(¢roids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
iter++;
- cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;
+ m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
int live = 1;
}
- cout << "\nFinalizing..." << endl;
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("\nFinalizing...\n");
fill();
+
+ if (m->control_pressed) { break; }
+
setOTUs();
+
vector<int> otuCounts(numOTUs, 0);
for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
calcCentroidsDriver(0, numOTUs);
- writeQualities(otuCounts);
- writeSequences(otuCounts);
- writeNames(otuCounts);
- writeClusters(otuCounts);
- writeGroups();
- remove(distFileName.c_str());
- remove(namesFileName.c_str());
- remove(listFileName.c_str());
+ if (m->control_pressed) { break; }
- cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;
+ writeQualities(otuCounts); if (m->control_pressed) { break; }
+ writeSequences(otuCounts); if (m->control_pressed) { break; }
+ writeNames(otuCounts); if (m->control_pressed) { break; }
+ writeClusters(otuCounts); if (m->control_pressed) { break; }
+ writeGroups(); if (m->control_pressed) { break; }
+
+
+ m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
-
}
else{
int abort = 1;
- bool live = 1;
MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
if(abort){ return 0; }
MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
for(int i=0;i<numFiles;i++){
+
+ if (m->control_pressed) { break; }
+
//Now into the pyrodist part
+ bool live = 1;
+
char fileName[1024];
MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
-
+
+ if (m->control_pressed) { break; }
+
int done = 1;
MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
int total;
while(live){
-
+
+ if (m->control_pressed) { break; }
+
MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
singleTau.assign(total, 0.0000);
seqNumber.assign(total, 0);
seqIndex.assign(total, 0);
-
+
MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(¢roids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-
+
calcCentroidsDriver(startOTU, endOTU);
MPI_Send(¢roids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
-
MPI_Recv(¢roids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
}
}
- }
-
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
MPI_Barrier(MPI_COMM_WORLD);
+
+
+ if(compositeFASTAFileName != ""){
+ outputNames.push_back(compositeFASTAFileName);
+ outputNames.push_back(compositeNamesFileName);
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
return 0;
}
double begClock = clock();
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<i;j++){
float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
}
}
if(i % 100 == 0){
- cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+ m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
}
}
- cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
- string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
+ string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
if(pid != 0){ fDistFileName += ".temp." + toString(pid); }
+
+ if (m->control_pressed) { return fDistFileName; }
+
+ m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
ofstream distFile(fDistFileName.c_str());
distFile << outStream.str();
try {
if (abort == true) { return 0; }
- cout.setf(ios::fixed, ios::floatfield);
- cout.setf(ios::showpoint);
-
- getSingleLookUp();
- getJointLookUp();
+ getSingleLookUp(); if (m->control_pressed) { return 0; }
+ getJointLookUp(); if (m->control_pressed) { return 0; }
+
vector<string> flowFileVector;
if(flowFilesFileName != "not found"){
string fName;
ifstream flowFilesFile;
m->openInputFile(flowFilesFileName, flowFilesFile);
while(flowFilesFile){
- flowFilesFile >> fName;
+ fName = m->getline(flowFilesFile);
flowFileVector.push_back(fName);
m->gobble(flowFilesFile);
}
for(int i=0;i<numFiles;i++){
+
+ if (m->control_pressed) { break; }
+
flowFileName = flowFileVector[i];
- cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
- cout << "Reading flowgrams..." << endl;
+ m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+ m->mothurOut("Reading flowgrams...\n");
getFlowData();
- cout << "Identifying unique flowgrams..." << endl;
+
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("Identifying unique flowgrams...\n");
getUniques();
+ if (m->control_pressed) { break; }
- cout << "Calculating distances between flowgrams..." << endl;
+ m->mothurOut("Calculating distances between flowgrams...\n");
string distFileName = createDistFile(processors);
string namesFileName = createNamesFile();
- cout << "\nClustering flowgrams..." << endl;
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("\nClustering flowgrams...\n");
string listFileName = cluster(distFileName, namesFileName);
+
+ if (m->control_pressed) { break; }
+
getOTUData(listFileName);
+ if (m->control_pressed) { break; }
+
+ m->mothurRemove(distFileName);
+ m->mothurRemove(namesFileName);
+ m->mothurRemove(listFileName);
+
initPyroCluster();
+ if (m->control_pressed) { break; }
+
double maxDelta = 0;
int iter = 0;
double begClock = clock();
- unsigned long int begTime = time(NULL);
+ unsigned long long begTime = time(NULL);
- cout << "\nDenoising flowgrams..." << endl;
- cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+ m->mothurOut("\nDenoising flowgrams...\n");
+ m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+ if (m->control_pressed) { break; }
+
double cycClock = clock();
- unsigned long int cycTime = time(NULL);
+ unsigned long long cycTime = time(NULL);
fill();
+ if (m->control_pressed) { break; }
+
calcCentroids();
- maxDelta = getNewWeights();
- double nLL = getLikelihood();
+ if (m->control_pressed) { break; }
+
+ maxDelta = getNewWeights(); if (m->control_pressed) { break; }
+ double nLL = getLikelihood(); if (m->control_pressed) { break; }
checkCentroids();
+ if (m->control_pressed) { break; }
+
calcNewDistances();
-
+
+ if (m->control_pressed) { break; }
+
iter++;
- cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;
+ m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+
}
- cout << "\nFinalizing..." << endl;
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("\nFinalizing...\n");
fill();
+
+ if (m->control_pressed) { break; }
+
setOTUs();
+ if (m->control_pressed) { break; }
+
vector<int> otuCounts(numOTUs, 0);
for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
- calcCentroidsDriver(0, numOTUs);
- writeQualities(otuCounts);
- writeSequences(otuCounts);
- writeNames(otuCounts);
- writeClusters(otuCounts);
- writeGroups();
+ calcCentroidsDriver(0, numOTUs); if (m->control_pressed) { break; }
+ writeQualities(otuCounts); if (m->control_pressed) { break; }
+ writeSequences(otuCounts); if (m->control_pressed) { break; }
+ writeNames(otuCounts); if (m->control_pressed) { break; }
+ writeClusters(otuCounts); if (m->control_pressed) { break; }
+ writeGroups(); if (m->control_pressed) { break; }
- remove(distFileName.c_str());
- remove(namesFileName.c_str());
- remove(listFileName.c_str());
-
- cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;
+ m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+
+ if(compositeFASTAFileName != ""){
+ outputNames.push_back(compositeFASTAFileName);
+ outputNames.push_back(compositeNamesFileName);
}
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
return 0;
}
catch(exception& e) {
m->openInputFile(flowFileName, flowFile);
string seqName;
+ seqNameVector.clear();
+ lengths.clear();
+ flowDataIntI.clear();
+ nameMap.clear();
+
int currentNumFlowCells;
flowFile >> numFlowCells;
int index = 0;//pcluster
while(!flowFile.eof()){
+
+ if (m->control_pressed) { break; }
+
flowFile >> seqName >> currentNumFlowCells;
lengths.push_back(currentNumFlowCells);
numSeqs = seqNameVector.size();
for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { break; }
+
int iNumFlowCells = i * numFlowCells;
for(int j=lengths[i];j<numFlowCells;j++){
flowDataIntI[iNumFlowCells + j] = 0;
m->openInputFile(lookupFileName, lookUpFile);
for(int i=0;i<HOMOPS;i++){
+
+ if (m->control_pressed) { break; }
+
float logFracFreq;
lookUpFile >> logFracFreq;
jointLookUp.resize(NUMBINS * NUMBINS, 0);
for(int i=0;i<NUMBINS;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<NUMBINS;j++){
double minSum = 100000000;
for(int i=0;i<HOMOPS;i++){//loop signal strength
+
+ if (m->control_pressed) { break; }
+
float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
if(negLogProb < minNegLogProb) { minNegLogProb = negLogProb; }
}
vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { break; }
+
int index = 0;
vector<short> current(numFlowCells);
- for(int j=0;j<numFlowCells;j++){ current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0)); }
+ for(int j=0;j<numFlowCells;j++){
+ current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+ }
for(int j=0;j<numUniques;j++){
int offset = j * numFlowCells;
bool toEnd = 1;
- for(int k=0;k<numFlowCells;k++){
+ int shorterLength;
+ if(lengths[i] < uniqueLengths[j]) { shorterLength = lengths[i]; }
+ else { shorterLength = uniqueLengths[j]; }
+
+ for(int k=0;k<shorterLength;k++){
if(current[k] != uniqueFlowgrams[offset + k]){
toEnd = 0;
break;
mapSeqToUnique[i] = j;
uniqueCount[j]++;
index = j;
+ if(lengths[i] > uniqueLengths[j]) { uniqueLengths[j] = lengths[i]; }
break;
}
index++;
uniqueFlowDataIntI.resize(numFlowCells * numUniques);
uniqueLengths.resize(numUniques);
- flowDataPrI.assign(numSeqs * numFlowCells, 0);
- for(int i=0;i<flowDataPrI.size();i++) { flowDataPrI[i] = getProbIntensity(flowDataIntI[i]); }
+ flowDataPrI.resize(numSeqs * numFlowCells, 0);
+ for(int i=0;i<flowDataPrI.size();i++) { if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]); }
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "getUniques");
float dist = 0;
for(int i=0;i<minLength;i++){
+
+ if (m->control_pressed) { break; }
+
int flowAIntI = flowDataIntI[ANumFlowCells + i];
float flowAPrI = flowDataPrI[ANumFlowCells + i];
double begClock = clock();
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<i;j++){
float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
m->mothurOutEndLine();
}
}
- m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
- m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
- m->mothurOutEndLine();
ofstream distFile(distFileName.c_str());
distFile << outStream.str();
distFile.close();
+
+ if (m->control_pressed) {}
+ else {
+ m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
+ m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+ m->mothurOutEndLine();
+ }
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "flowDistParentFork");
string ShhherCommand::createDistFile(int processors){
try{
- string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
+//////////////////////// until I figure out the shared memory issue //////////////////////
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+ processors=1;
+#endif
+//////////////////////// until I figure out the shared memory issue //////////////////////
+
+ string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
- unsigned long int begTime = time(NULL);
+ unsigned long long begTime = time(NULL);
double begClock = clock();
-
- vector<int> start;
- vector<int> end;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if (numSeqs < processors){ processors = 1; }
+
if(processors == 1) { flowDistParentFork(fDistFileName, 0, numUniques); }
+
else{ //you have multiple processors
- if (numSeqs < processors){ processors = 1; }
-
vector<int> start(processors, 0);
vector<int> end(processors, 0);
+ int process = 1;
+ vector<int> processIDs;
+
for (int i = 0; i < processors; i++) {
start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
}
- int process = 1;
- vector<int> processIDs;
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
int temp = processIDs[i];
wait(&temp);
}
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the flowDistParentForkData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<flowDistParentForkData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for(int i = 0; i < processors-1; i++){
+ // Allocate memory for thread data.
+ string extension = extension = toString(i) + ".temp";
+
+ flowDistParentForkData* tempdist = new flowDistParentForkData((fDistFileName + extension), mapUniqueToSeq, mapSeqToUnique, lengths, flowDataIntI, flowDataPrI, jointLookUp, m, start[i+1], end[i+1], numFlowCells, cutoff, i);
+ pDataArray.push_back(tempdist);
+ processIDs.push_back(i);
+
+ //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MyflowDistParentForkThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //parent does its part
+ flowDistParentFork(fDistFileName, start[0], end[0]);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
//append and remove temp files
for (int i=0;i<processIDs.size();i++) {
m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
- remove((fDistFileName + toString(processIDs[i]) + ".temp").c_str());
+ m->mothurRemove((fDistFileName + toString(processIDs[i]) + ".temp"));
}
}
-#else
- flowDistParentFork(fDistFileName, 0, numUniques);
-#endif
-
m->mothurOutEndLine();
+ m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
- cout << "Total time: " << (time(NULL) - begTime) << "\t" << (clock() - begClock)/CLOCKS_PER_SEC << endl;;
-
return fDistFileName;
}
catch(exception& e) {
duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
}
- string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.names";
+ string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
for(int i=0;i<numUniques;i++){
+
+ if (m->control_pressed) { break; }
+
// nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
}
string ShhherCommand::cluster(string distFileName, string namesFileName){
try {
- SparseMatrix* matrix;
- ListVector* list;
- RAbundVector* rabund;
-
- globaldata->setNameFile(namesFileName);
- globaldata->setColumnFile(distFileName);
- globaldata->setFormat("column");
-
ReadMatrix* read = new ReadColumnMatrix(distFileName);
read->setCutoff(cutoff);
clusterNameMap->readMap();
read->read(clusterNameMap);
- list = read->getListVector();
- matrix = read->getMatrix();
+ ListVector* list = read->getListVector();
+ SparseMatrix* matrix = read->getMatrix();
delete read;
delete clusterNameMap;
- rabund = new RAbundVector(list->getRAbundVector());
+ RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest");
string tag = cluster->getTag();
double clusterCutoff = cutoff;
while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+
+ if (m->control_pressed) { break; }
+
cluster->update(clusterCutoff);
}
list->setLabel(toString(cutoff));
- string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.list";
+ string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
ofstream listFile;
m->openOutputFile(listFileName, listFile);
list->print(listFile);
otuData.assign(numSeqs, 0);
cumNumSeqs.assign(numOTUs, 0);
nSeqsPerOTU.assign(numOTUs, 0);
- aaP.resize(numOTUs);
+ aaP.clear();aaP.resize(numOTUs);
+
+ seqNumber.clear();
+ aaI.clear();
+ seqIndex.clear();
string singleOTU = "";
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
listFile >> singleOTU;
for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
aaP[i].push_back(0);
}
+
+
}
for(int i=1;i<numOTUs;i++){
seqIndex = seqNumber;
listFile.close();
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "getOTUData");
void ShhherCommand::initPyroCluster(){
try{
+
+ if (numOTUs < processors) { processors = 1; }
+
dist.assign(numSeqs * numOTUs, 0);
change.assign(numOTUs, 1);
centroids.assign(numOTUs, -1);
try {
int index = 0;
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
cumNumSeqs[i] = index;
for(int j=0;j<nSeqsPerOTU[i];j++){
seqNumber[index] = aaP[i][j];
try{
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-
+
if(processors == 1) {
calcCentroidsDriver(0, numOTUs);
}
for(int i=start;i<finish;i++){
+ if (m->control_pressed) { break; }
+
double count = 0;
int position = 0;
int minFlowGram = 100000000;
for(int j=0;j<nSeqsPerOTU[i];j++){
count += singleTau[seqNumber[cumNumSeqs[i] + j]];
}
-
+
if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
vector<double> adF(nSeqsPerOTU[i]);
vector<int> anL(nSeqsPerOTU[i]);
int flowBValue = flow * numFlowCells;
double dist = 0;
-
+
for(int i=0;i<length;i++){
dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
flowAValue++;
for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { break; }
+
double difference = weight[i];
weight[i] = 0;
string hold;
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<nSeqsPerOTU[i];j++){
int index = cumNumSeqs[i] + j;
int nI = seqIndex[index];
}
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
if(unique[i] == 1){
for(int j=i+1;j<numOTUs;j++){
if(unique[j] == 1){
try{
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-
+
if(processors == 1) {
calcNewDistancesParent(0, numSeqs);
}
exit(0);
}
}
-
+
//parent does its part
calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
int total = seqIndex.size();
try{
vector<double> newTau(numOTUs,0);
vector<double> norms(numSeqs, 0);
- otuIndex.resize(0);
- seqIndex.resize(0);
- singleTau.resize(0);
-
-
+ otuIndex.clear();
+ seqIndex.clear();
+ singleTau.clear();
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
double offset = 1e8;
int indexOffset = i * numOTUs;
child_singleTau.resize(0);
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
double offset = 1e8;
int indexOffset = i * numOTUs;
vector<double> newTau(numOTUs,0);
vector<double> norms(numSeqs, 0);
nSeqsPerOTU.assign(numOTUs, 0);
-
+
for(int i=startSeq;i<stopSeq;i++){
- int indexOffset = i * numOTUs;
+ if (m->control_pressed) { break; }
+
+ int indexOffset = i * numOTUs;
+
double offset = 1e8;
for(int j=0;j<numOTUs;j++){
+
if(weight[j] > MIN_WEIGHT && change[j] == 1){
dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
}
-
+
if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
offset = dist[indexOffset + j];
}
}
-
+
for(int j=0;j<numOTUs;j++){
if(weight[j] > MIN_WEIGHT){
newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
newTau[j] = 0.0;
}
}
-
+
for(int j=0;j<numOTUs;j++){
newTau[j] /= norms[i];
}
-
+
for(int j=0;j<numOTUs;j++){
if(newTau[j] > MIN_TAU){
nSeqsPerOTU[j]++;
}
}
+
}
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<nSeqsPerOTU[i];j++){
int index = cumNumSeqs[i] + j;
double tauValue = singleTau[seqNumber[index]];
void ShhherCommand::writeQualities(vector<int> otuCounts){
try {
- string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.qual";
+ string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual";
ofstream qualityFile;
m->openOutputFile(qualityFileName, qualityFile);
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
int index = 0;
int base = 0;
}
}
qualityFile.close();
-
+ outputNames.push_back(qualityFileName);
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "writeQualities");
void ShhherCommand::writeSequences(vector<int> otuCounts){
try {
- string bases = "TACG";
- string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
+ string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta";
ofstream fastaFile;
m->openOutputFile(fastaFileName, fastaFile);
vector<string> names(numOTUs, "");
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
int index = centroids[i];
if(otuCounts[i] > 0){
fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
- for(int j=8;j<numFlowCells;j++){
+ string newSeq = "";
+
+ for(int j=0;j<numFlowCells;j++){
- char base = bases[j % 4];
+ char base = flowOrder[j % 4];
for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
- fastaFile << base;
+ newSeq += base;
}
}
- fastaFile << endl;
+
+ fastaFile << newSeq.substr(4) << endl;
}
}
fastaFile.close();
-
+
+ outputNames.push_back(fastaFileName);
+
if(compositeFASTAFileName != ""){
m->appendFiles(fastaFileName, compositeFASTAFileName);
}
void ShhherCommand::writeNames(vector<int> otuCounts){
try {
- string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.final.names";
+ string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
if(otuCounts[i] > 0){
nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
}
}
nameFile.close();
+ outputNames.push_back(nameFileName);
+
+
+ if(compositeNamesFileName != ""){
+ m->appendFiles(nameFileName, compositeNamesFileName);
+ }
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "writeNames");
void ShhherCommand::writeGroups(){
try {
string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.'));
- string groupFileName = fileRoot + ".pn.groups";
+ string groupFileName = fileRoot + ".shhh.groups";
ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
for(int i=0;i<numSeqs;i++){
+ if (m->control_pressed) { break; }
groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
}
groupFile.close();
+ outputNames.push_back(groupFileName);
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "writeGroups");
void ShhherCommand::writeClusters(vector<int> otuCounts){
try {
- string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.counts";
+ string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts";
ofstream otuCountsFile;
m->openOutputFile(otuCountsFileName, otuCountsFile);
- string bases = "TACG";
+ string bases = flowOrder;
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) {
+ break;
+ }
//output the translated version of the centroid sequence for the otu
if(otuCounts[i] > 0){
int index = centroids[i];
int sequence = aaI[i][j];
otuCountsFile << seqNameVector[sequence] << '\t';
- for(int k=8;k<lengths[sequence];k++){
+ string newSeq = "";
+
+ for(int k=0;k<lengths[sequence];k++){
char base = bases[k % 4];
int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-
+
for(int s=0;s<freq;s++){
- otuCountsFile << base;
+ newSeq += base;
+ //otuCountsFile << base;
}
}
- otuCountsFile << endl;
+ otuCountsFile << newSeq.substr(4) << endl;
}
otuCountsFile << endl;
}
}
otuCountsFile.close();
+ outputNames.push_back(otuCountsFileName);
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "writeClusters");