#include "sabundvector.hpp"
#include "listvector.hpp"
#include "cluster.hpp"
-#include "sparsematrix.hpp"
#include <cfloat>
//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "shhh.flows"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getOutputFileNameTag(string, string);
+
string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/Shhh.flows"; }
+ string getOutputPattern(string);
+ string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nQuince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011). Removing noise from pyrosequenced amplicons. BMC Bioinformatics 12:38.\nQuince C, Lanzén A, Curtis TP, Davenport RJ, Hall N, Head IM, Read LF, Sloan WT (2009). Accurate determination of microbial diversity from 454 pyrosequencing data. Nat. Methods 6:639.\nhttp://www.mothur.org/wiki/Shhh.flows"; }
string getDescription() { return "shhh.flows"; }
vector<string> flowFileVector;
vector<string> parseFlowFiles(string);
- int driver(vector<string>, string, string, int, int);
+ int driver(vector<string>, string, string);
int createProcesses(vector<string>);
int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
};
-/**************************************************************************************************/
+/**************************************************************************************************
//custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
}
};
-/**************************************************************************************************/
+/**************************************************************************************************
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){
int numFlowCells;
//int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ifstream flowFile;
// cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
}
}
// cout << "here" << endl;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
//int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int numUniques = 0;
uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
uniqueCount.assign(numSeqs, 0); // anWeights
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
double begClock = clock();
//flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ostringstream outStream;
outStream.setf(ios::fixed, ios::floatfield);
outStream.setf(ios::dec, ios::basefield);
for(int j=0;j<i;j++){
//float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
int minLength = lengths[mapSeqToUnique[seqA]];
if(lengths[seqB] < minLength){ minLength = lengths[mapSeqToUnique[seqB]]; }
}
flowDistance /= (float) minLength;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if(flowDistance < 1e-6){
outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
pDataArray->m->mothurOutEndLine();
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
pDataArray->m->mothurOutEndLine();
pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
//createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<string> duplicateNames(numUniques, "");
for(int i=0;i<numSeqs;i++){
duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
}
nameFile.close();
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
pDataArray->m->mothurOut("\nClustering flowgrams...\n");
string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
//cluster(listFileName, distFileName, namesFileName);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ReadMatrix* read = new ReadColumnMatrix(distFileName);
read->setCutoff(pDataArray->cutoff);
listFileOut.close();
delete matrix; delete cluster; delete rabund; delete list;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
//int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ifstream listFile;
pDataArray->m->openInputFile(listFileName, listFile);
string label;
seqIndex = seqNumber;
listFile.close();
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
double cycClock = clock();
unsigned long long cycTime = time(NULL);
//fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int indexFill = 0;
for(int i=0;i<numOTUs;i++){
indexFill++;
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
for(int i=0;i<numOTUs;i++){
if (pDataArray->m->control_pressed) { break; }
for(int k=0;k<position;k++){
// double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int flowAValue = anL[k] * numFlowCells;
int flowBValue = nI * numFlowCells;
}
dist = dist / (double)lengths[nI];
- /*****************************************************************************************************/
+ /*****************************************************************************************************
adF[k] += dist * tauValue;
}
}
centroids[i] = -1;
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
double maxChange = 0;
for(int i=0;i<numOTUs;i++){
if(difference > maxChange){ maxChange = difference; }
}
maxDelta = maxChange;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<long double> P(numSeqs, 0);
int effNumOTUs = 0;
}
nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//checkCentroids(numOTUs, centroids, weight);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<int> unique(numOTUs, 1);
for(int i=0;i<numOTUs;i++){
}
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int total = 0;
vector<double> newTau(numOTUs,0);
vector<double> norms(numSeqs, 0);
if(weight[j] > MIN_WEIGHT && change[j] == 1){
//dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int flowAValue = centroids[j] * numFlowCells;
int flowBValue = i * numFlowCells;
}
dist[indexOffset + j] = distTemp / (double)lengths[i];
- /*****************************************************************************************************/
+ /*****************************************************************************************************
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
pDataArray->m->mothurOut("\nFinalizing...\n");
//fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int indexFill = 0;
for(int i=0;i<numOTUs;i++){
indexFill++;
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
for(int i=0;i<numOTUs;i++){
}
//fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
indexFill = 0;
for(int i=0;i<numOTUs;i++){
}
/*****************************************************************************************************/
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
//calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
for(int i=0;i<numOTUs;i++){
if (pDataArray->m->control_pressed) { break; }
for(int k=0;k<position;k++){
// double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int flowAValue = anL[k] * numFlowCells;
int flowBValue = nI * numFlowCells;
}
dist = dist / (double)lengths[nI];
- /*****************************************************************************************************/
+ /*****************************************************************************************************
adF[k] += dist * tauValue;
}
}
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
string thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
}
qualityFile.close();
pDataArray->outputNames.push_back(qualityFileName);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
// writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
//writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
if(pDataArray->thisCompositeNameFileName != ""){
pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
//writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
}
}
otuCountsFile.close();
- pDataArray->outputNames.push_back(otuCountsFileName);
- /*****************************************************************************************************/
+ pDataArray->outputNames.push_back(otuCountsFileName)
+ /*****************************************************************************************************
//writeGroups(flowFileName, numSeqs, seqNameVector);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
}
groupFile.close();
pDataArray->outputNames.push_back(groupFileName);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
}
}
#endif
-
+*/
#endif