#include "sparsematrix.hpp"
#include <cfloat>
-//**********************************************************************************************************************
-
-#define NUMBINS 1000
-#define HOMOPS 10
-#define MIN_COUNT 0.1
-#define MIN_WEIGHT 0.1
-#define MIN_TAU 0.0001
-#define MIN_ITER 10
//**********************************************************************************************************************
vector<string> ShhherCommand::setParameters(){
try {
string ShhherCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+ helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
return helpString;
}
catch(exception& e) {
else{
ofstream temp;
+ //flow.files = 9 character offset
compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
m->openOutputFile(compositeFASTAFileName, temp);
temp.close();
// ...at some point should added some additional type checking...
string temp;
temp = validParameter.validFile(parameters, "lookup", true);
- if (temp == "not found") { lookupFileName = "LookUp_Titanium.pat"; }
- else if(temp == "not open") { abort = true; }
- else { lookupFileName = temp; }
+ if (temp == "not found") {
+ lookupFileName = "LookUp_Titanium.pat";
+
+ int ableToOpen;
+ ifstream in;
+ ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
+ in.close();
+
+ //if you can't open it, try input location
+ if (ableToOpen == 1) {
+ if (inputDir != "") { //default path is set
+ string tryPath = inputDir + lookupFileName;
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+ }
+ }
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+ }
+ }
+
+ //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+ if (ableToOpen == 1) {
+ string exepath = m->argv;
+ string tempPath = exepath;
+ for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+ exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+
+ string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+ }
+
+ if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
+ }
+ else if(temp == "not open") {
+
+ lookupFileName = validParameter.validFile(parameters, "lookup", false);
+
+ //if you can't open it its not inputDir, try mothur excutable location
+ string exepath = m->argv;
+ string tempPath = exepath;
+ for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+ exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+
+ string tryPath = m->getFullPathName(exepath) + lookupFileName;
+ m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ lookupFileName = tryPath;
+
+ if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
+ }else { lookupFileName = temp; }
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- convert(temp, processors);
+ m->mothurConvert(temp, processors);
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; }
- convert(temp, cutoff);
+ m->mothurConvert(temp, cutoff);
temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){ temp = "0.000001"; }
- convert(temp, minDelta);
+ m->mothurConvert(temp, minDelta);
temp = validParameter.validFile(parameters, "maxiter", false); if (temp == "not found"){ temp = "1000"; }
- convert(temp, maxIters);
+ m->mothurConvert(temp, maxIters);
temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; }
- convert(temp, sigma);
+ m->mothurConvert(temp, sigma);
flowOrder = validParameter.validFile(parameters, "order", false);
if (flowOrder == "not found"){ flowOrder = "TACG"; }
processors = ncpus;
m->mothurOut("\nGetting preliminary data...\n");
- getSingleLookUp();
- getJointLookUp();
+ getSingleLookUp(); if (m->control_pressed) { return 0; }
+ getJointLookUp(); if (m->control_pressed) { return 0; }
vector<string> flowFileVector;
if(flowFilesFileName != "not found"){
ifstream flowFilesFile;
m->openInputFile(flowFilesFileName, flowFilesFile);
while(flowFilesFile){
- flowFilesFile >> fName;
+ fName = m->getline(flowFilesFile);
flowFileVector.push_back(fName);
m->gobble(flowFilesFile);
}
}
for(int i=0;i<numFiles;i++){
+
+ if (m->control_pressed) { break; }
+
double begClock = clock();
- unsigned long int begTime = time(NULL);
+ unsigned long long begTime = time(NULL);
flowFileName = flowFileVector[i];
m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
m->mothurOut("Reading flowgrams...\n");
getFlowData();
+
+ if (m->control_pressed) { break; }
m->mothurOut("Identifying unique flowgrams...\n");
getUniques();
+
+ if (m->control_pressed) { break; }
m->mothurOut("Calculating distances between flowgrams...\n");
char fileName[1024];
string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
+ if (m->control_pressed) { break; }
+
int done;
for(int i=1;i<ncpus;i++){
MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
- remove((distFileName + ".temp." + toString(i)).c_str());
+ m->mothurRemove((distFileName + ".temp." + toString(i)));
}
string namesFileName = createNamesFile();
+ if (m->control_pressed) { break; }
+
m->mothurOut("\nClustering flowgrams...\n");
string listFileName = cluster(distFileName, namesFileName);
-
+
+ if (m->control_pressed) { break; }
+
getOTUData(listFileName);
- remove(distFileName.c_str());
- remove(namesFileName.c_str());
- remove(listFileName.c_str());
+ m->mothurRemove(distFileName);
+ m->mothurRemove(namesFileName);
+ m->mothurRemove(listFileName);
+
+ if (m->control_pressed) { break; }
initPyroCluster();
+ if (m->control_pressed) { break; }
+
for(int i=1;i<ncpus;i++){
MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
}
+ if (m->control_pressed) { break; }
double maxDelta = 0;
int iter = 0;
m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
-
+
double cycClock = clock();
- unsigned long int cycTime = time(NULL);
+ unsigned long long cycTime = time(NULL);
fill();
+
+ if (m->control_pressed) { break; }
int total = singleTau.size();
for(int i=1;i<ncpus;i++){
}
}
- maxDelta = getNewWeights();
- double nLL = getLikelihood();
- checkCentroids();
+ maxDelta = getNewWeights(); if (m->control_pressed) { break; }
+ double nLL = getLikelihood(); if (m->control_pressed) { break; }
+ checkCentroids(); if (m->control_pressed) { break; }
for(int i=1;i<ncpus;i++){
MPI_Send(¢roids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
}
+ if (m->control_pressed) { break; }
+
m->mothurOut("\nFinalizing...\n");
fill();
+
+ if (m->control_pressed) { break; }
+
setOTUs();
vector<int> otuCounts(numOTUs, 0);
for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
calcCentroidsDriver(0, numOTUs);
- writeQualities(otuCounts);
- writeSequences(otuCounts);
- writeNames(otuCounts);
- writeClusters(otuCounts);
- writeGroups();
+ if (m->control_pressed) { break; }
+
+ writeQualities(otuCounts); if (m->control_pressed) { break; }
+ writeSequences(otuCounts); if (m->control_pressed) { break; }
+ writeNames(otuCounts); if (m->control_pressed) { break; }
+ writeClusters(otuCounts); if (m->control_pressed) { break; }
+ writeGroups(); if (m->control_pressed) { break; }
m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
for(int i=0;i<numFiles;i++){
+
+ if (m->control_pressed) { break; }
+
//Now into the pyrodist part
bool live = 1;
int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
-
+
+ if (m->control_pressed) { break; }
+
int done = 1;
MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
while(live){
+ if (m->control_pressed) { break; }
+
MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
singleTau.assign(total, 0.0000);
seqNumber.assign(total, 0);
MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
}
}
- }
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
MPI_Barrier(MPI_COMM_WORLD);
double begClock = clock();
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<i;j++){
float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
}
}
- m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
-
string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
if(pid != 0){ fDistFileName += ".temp." + toString(pid); }
+
+ if (m->control_pressed) { return fDistFileName; }
+
+ m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
ofstream distFile(fDistFileName.c_str());
distFile << outStream.str();
try {
if (abort == true) { return 0; }
- getSingleLookUp();
- getJointLookUp();
+ getSingleLookUp(); if (m->control_pressed) { return 0; }
+ getJointLookUp(); if (m->control_pressed) { return 0; }
+
vector<string> flowFileVector;
if(flowFilesFileName != "not found"){
string fName;
ifstream flowFilesFile;
m->openInputFile(flowFilesFileName, flowFilesFile);
while(flowFilesFile){
- flowFilesFile >> fName;
+ fName = m->getline(flowFilesFile);
flowFileVector.push_back(fName);
m->gobble(flowFilesFile);
}
for(int i=0;i<numFiles;i++){
+
+ if (m->control_pressed) { break; }
+
flowFileName = flowFileVector[i];
m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
m->mothurOut("Reading flowgrams...\n");
getFlowData();
+ if (m->control_pressed) { break; }
+
m->mothurOut("Identifying unique flowgrams...\n");
getUniques();
+ if (m->control_pressed) { break; }
m->mothurOut("Calculating distances between flowgrams...\n");
string distFileName = createDistFile(processors);
string namesFileName = createNamesFile();
-
+
+ if (m->control_pressed) { break; }
+
m->mothurOut("\nClustering flowgrams...\n");
string listFileName = cluster(distFileName, namesFileName);
+
+ if (m->control_pressed) { break; }
+
getOTUData(listFileName);
- remove(distFileName.c_str());
- remove(namesFileName.c_str());
- remove(listFileName.c_str());
+ if (m->control_pressed) { break; }
+
+ m->mothurRemove(distFileName);
+ m->mothurRemove(namesFileName);
+ m->mothurRemove(listFileName);
initPyroCluster();
+ if (m->control_pressed) { break; }
+
double maxDelta = 0;
int iter = 0;
double begClock = clock();
- unsigned long int begTime = time(NULL);
+ unsigned long long begTime = time(NULL);
m->mothurOut("\nDenoising flowgrams...\n");
m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+ if (m->control_pressed) { break; }
+
double cycClock = clock();
- unsigned long int cycTime = time(NULL);
+ unsigned long long cycTime = time(NULL);
fill();
+ if (m->control_pressed) { break; }
+
calcCentroids();
- maxDelta = getNewWeights();
- double nLL = getLikelihood();
+ if (m->control_pressed) { break; }
+
+ maxDelta = getNewWeights(); if (m->control_pressed) { break; }
+ double nLL = getLikelihood(); if (m->control_pressed) { break; }
checkCentroids();
+ if (m->control_pressed) { break; }
+
calcNewDistances();
-
+
+ if (m->control_pressed) { break; }
+
iter++;
m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
}
+ if (m->control_pressed) { break; }
+
m->mothurOut("\nFinalizing...\n");
fill();
+
+ if (m->control_pressed) { break; }
+
setOTUs();
+ if (m->control_pressed) { break; }
+
vector<int> otuCounts(numOTUs, 0);
for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
- calcCentroidsDriver(0, numOTUs);
- writeQualities(otuCounts);
- writeSequences(otuCounts);
- writeNames(otuCounts);
- writeClusters(otuCounts);
- writeGroups();
+ calcCentroidsDriver(0, numOTUs); if (m->control_pressed) { break; }
+ writeQualities(otuCounts); if (m->control_pressed) { break; }
+ writeSequences(otuCounts); if (m->control_pressed) { break; }
+ writeNames(otuCounts); if (m->control_pressed) { break; }
+ writeClusters(otuCounts); if (m->control_pressed) { break; }
+ writeGroups(); if (m->control_pressed) { break; }
m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+
if(compositeFASTAFileName != ""){
outputNames.push_back(compositeFASTAFileName);
outputNames.push_back(compositeNamesFileName);
flowFile >> numFlowCells;
int index = 0;//pcluster
while(!flowFile.eof()){
+
+ if (m->control_pressed) { break; }
+
flowFile >> seqName >> currentNumFlowCells;
lengths.push_back(currentNumFlowCells);
numSeqs = seqNameVector.size();
for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { break; }
+
int iNumFlowCells = i * numFlowCells;
for(int j=lengths[i];j<numFlowCells;j++){
flowDataIntI[iNumFlowCells + j] = 0;
m->openInputFile(lookupFileName, lookUpFile);
for(int i=0;i<HOMOPS;i++){
+
+ if (m->control_pressed) { break; }
+
float logFracFreq;
lookUpFile >> logFracFreq;
jointLookUp.resize(NUMBINS * NUMBINS, 0);
for(int i=0;i<NUMBINS;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<NUMBINS;j++){
double minSum = 100000000;
for(int i=0;i<HOMOPS;i++){//loop signal strength
+
+ if (m->control_pressed) { break; }
+
float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
if(negLogProb < minNegLogProb) { minNegLogProb = negLogProb; }
}
vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { break; }
+
int index = 0;
vector<short> current(numFlowCells);
uniqueLengths.resize(numUniques);
flowDataPrI.resize(numSeqs * numFlowCells, 0);
- for(int i=0;i<flowDataPrI.size();i++) { flowDataPrI[i] = getProbIntensity(flowDataIntI[i]); }
+ for(int i=0;i<flowDataPrI.size();i++) { if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]); }
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "getUniques");
float dist = 0;
for(int i=0;i<minLength;i++){
+
+ if (m->control_pressed) { break; }
+
int flowAIntI = flowDataIntI[ANumFlowCells + i];
float flowAPrI = flowDataPrI[ANumFlowCells + i];
double begClock = clock();
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<i;j++){
float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
m->mothurOutEndLine();
}
}
- m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
- m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
- m->mothurOutEndLine();
ofstream distFile(distFileName.c_str());
distFile << outStream.str();
distFile.close();
+
+ if (m->control_pressed) {}
+ else {
+ m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
+ m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+ m->mothurOutEndLine();
+ }
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "flowDistParentFork");
string ShhherCommand::createDistFile(int processors){
try{
+//////////////////////// until I figure out the shared memory issue //////////////////////
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+ processors=1;
+#endif
+//////////////////////// until I figure out the shared memory issue //////////////////////
+
string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
- unsigned long int begTime = time(NULL);
+ unsigned long long begTime = time(NULL);
double begClock = clock();
-
- vector<int> start;
- vector<int> end;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if (numSeqs < processors){ processors = 1; }
+
if(processors == 1) { flowDistParentFork(fDistFileName, 0, numUniques); }
+
else{ //you have multiple processors
- if (numSeqs < processors){ processors = 1; }
-
vector<int> start(processors, 0);
vector<int> end(processors, 0);
+ int process = 1;
+ vector<int> processIDs;
+
for (int i = 0; i < processors; i++) {
start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
}
- int process = 1;
- vector<int> processIDs;
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
int temp = processIDs[i];
wait(&temp);
}
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the flowDistParentForkData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<flowDistParentForkData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for(int i = 0; i < processors-1; i++){
+ // Allocate memory for thread data.
+ string extension = extension = toString(i) + ".temp";
+
+ flowDistParentForkData* tempdist = new flowDistParentForkData((fDistFileName + extension), mapUniqueToSeq, mapSeqToUnique, lengths, flowDataIntI, flowDataPrI, jointLookUp, m, start[i+1], end[i+1], numFlowCells, cutoff, i);
+ pDataArray.push_back(tempdist);
+ processIDs.push_back(i);
+
+ //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MyflowDistParentForkThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //parent does its part
+ flowDistParentFork(fDistFileName, start[0], end[0]);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
//append and remove temp files
for (int i=0;i<processIDs.size();i++) {
m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
- remove((fDistFileName + toString(processIDs[i]) + ".temp").c_str());
+ m->mothurRemove((fDistFileName + toString(processIDs[i]) + ".temp"));
}
}
-#else
- flowDistParentFork(fDistFileName, 0, numUniques);
-#endif
-
m->mothurOutEndLine();
-
m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
-
return fDistFileName;
}
catch(exception& e) {
m->openOutputFile(nameFileName, nameFile);
for(int i=0;i<numUniques;i++){
+
+ if (m->control_pressed) { break; }
+
// nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
}
double clusterCutoff = cutoff;
while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+
+ if (m->control_pressed) { break; }
+
cluster->update(clusterCutoff);
}
string singleOTU = "";
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
listFile >> singleOTU;
void ShhherCommand::initPyroCluster(){
try{
+ if (numOTUs < processors) { processors = 1; }
+
dist.assign(numSeqs * numOTUs, 0);
change.assign(numOTUs, 1);
centroids.assign(numOTUs, -1);
try {
int index = 0;
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
cumNumSeqs[i] = index;
for(int j=0;j<nSeqsPerOTU[i];j++){
seqNumber[index] = aaP[i][j];
try{
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-
+
if(processors == 1) {
calcCentroidsDriver(0, numOTUs);
}
try{
-
for(int i=start;i<finish;i++){
+ if (m->control_pressed) { break; }
+
double count = 0;
int position = 0;
int minFlowGram = 100000000;
int flowBValue = flow * numFlowCells;
double dist = 0;
-
+
for(int i=0;i<length;i++){
dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
flowAValue++;
for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { break; }
+
double difference = weight[i];
weight[i] = 0;
string hold;
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<nSeqsPerOTU[i];j++){
int index = cumNumSeqs[i] + j;
int nI = seqIndex[index];
}
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
if(unique[i] == 1){
for(int j=i+1;j<numOTUs;j++){
if(unique[j] == 1){
try{
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-
+
if(processors == 1) {
calcNewDistancesParent(0, numSeqs);
}
exit(0);
}
}
-
+
//parent does its part
calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
int total = seqIndex.size();
seqIndex.clear();
singleTau.clear();
-
-
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
double offset = 1e8;
int indexOffset = i * numOTUs;
child_singleTau.resize(0);
for(int i=startSeq;i<stopSeq;i++){
+
+ if (m->control_pressed) { break; }
+
double offset = 1e8;
int indexOffset = i * numOTUs;
vector<double> newTau(numOTUs,0);
vector<double> norms(numSeqs, 0);
nSeqsPerOTU.assign(numOTUs, 0);
-
+
for(int i=startSeq;i<stopSeq;i++){
- int indexOffset = i * numOTUs;
+ if (m->control_pressed) { break; }
+
+ int indexOffset = i * numOTUs;
+
double offset = 1e8;
for(int j=0;j<numOTUs;j++){
+
if(weight[j] > MIN_WEIGHT && change[j] == 1){
dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
}
-
+
if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
offset = dist[indexOffset + j];
}
}
-
+
for(int j=0;j<numOTUs;j++){
if(weight[j] > MIN_WEIGHT){
newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
newTau[j] = 0.0;
}
}
-
+
for(int j=0;j<numOTUs;j++){
newTau[j] /= norms[i];
}
-
+
for(int j=0;j<numOTUs;j++){
if(newTau[j] > MIN_TAU){
nSeqsPerOTU[j]++;
}
}
+
}
+
}
catch(exception& e) {
m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
for(int j=0;j<nSeqsPerOTU[i];j++){
int index = cumNumSeqs[i] + j;
double tauValue = singleTau[seqNumber[index]];
void ShhherCommand::writeQualities(vector<int> otuCounts){
try {
- string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual";
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
ofstream qualityFile;
m->openOutputFile(qualityFileName, qualityFile);
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
int index = 0;
int base = 0;
void ShhherCommand::writeSequences(vector<int> otuCounts){
try {
-
- string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta";
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
ofstream fastaFile;
m->openOutputFile(fastaFileName, fastaFile);
vector<string> names(numOTUs, "");
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
int index = centroids[i];
if(otuCounts[i] > 0){
void ShhherCommand::writeNames(vector<int> otuCounts){
try {
- string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
if(otuCounts[i] > 0){
nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
void ShhherCommand::writeGroups(){
try {
- string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.'));
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
+ string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
string groupFileName = fileRoot + ".shhh.groups";
ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
for(int i=0;i<numSeqs;i++){
+ if (m->control_pressed) { break; }
groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
}
groupFile.close();
void ShhherCommand::writeClusters(vector<int> otuCounts){
try {
- string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts";
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
ofstream otuCountsFile;
m->openOutputFile(otuCountsFileName, otuCountsFile);
string bases = flowOrder;
for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) {
+ break;
+ }
//output the translated version of the centroid sequence for the otu
if(otuCounts[i] > 0){
int index = centroids[i];