}
}
//**********************************************************************************************************************
+string ShhherCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "shhh.fasta"; }
+ else if (type == "name") { outputFileName = "shhh.names"; }
+ else if (type == "group") { outputFileName = "shhh.groups"; }
+ else if (type == "counts") { outputFileName = "shhh.counts"; }
+ else if (type == "qfile") { outputFileName = "shhh.qual"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ShhherCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ShhherCommand::ShhherCommand(){
try {
setParameters();
//initialize outputTypes
-// vector<string> tempOutNames;
-// outputTypes["pn.dist"] = tempOutNames;
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
+ outputTypes["counts"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
}
catch(exception& e) {
}
//initialize outputTypes
- vector<string> tempOutNames;
-// outputTypes["pn.dist"] = tempOutNames;
- // outputTypes["fasta"] = tempOutNames;
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
+ outputTypes["counts"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
+
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if(compositeFASTAFileName != ""){
- outputNames.push_back(compositeFASTAFileName);
- outputNames.push_back(compositeNamesFileName);
+ outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName);
+ outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName);
}
m->mothurOutEndLine();
duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
}
- string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+ string nameFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
try{
if (numOTUs < processors) { processors = 1; }
+ if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
+
dist.assign(numSeqs * numOTUs, 0);
change.assign(numOTUs, 1);
centroids.assign(numOTUs, -1);
nSeqsBreaks.assign(processors+1, 0);
nOTUsBreaks.assign(processors+1, 0);
+ if (m->debug) { m->mothurOut("[DEBUG]: made it through the memory allocation.\n"); }
+
nSeqsBreaks[0] = 0;
for(int i=0;i<processors;i++){
nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
float intensity;
- flowFile >> numFlowCells;
+ string numFlowTest;
+ flowFile >> numFlowTest;
+
+ if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
+ else { convert(numFlowTest, numFlowCells); }
+
int index = 0;//pcluster
while(!flowFile.eof()){
try {
ReadMatrix* read = new ReadColumnMatrix(distFileName);
- read->setCutoff(cutoff);
-
- NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
- clusterNameMap->readMap();
- read->read(clusterNameMap);
-
- ListVector* list = read->getListVector();
- SparseMatrix* matrix = read->getMatrix();
+ read->setCutoff(cutoff);
+
+ NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+ clusterNameMap->readMap();
+ read->read(clusterNameMap);
- delete read;
- delete clusterNameMap;
+ ListVector* list = read->getListVector();
+ SparseDistanceMatrix* matrix = read->getDMatrix();
+
+ delete read;
+ delete clusterNameMap;
RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile");
ofstream qualityFile;
m->openOutputFile(qualityFileName, qualityFile);
}
}
qualityFile.close();
- outputNames.push_back(qualityFileName);
+ outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName);
}
catch(exception& e) {
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
ofstream fastaFile;
m->openOutputFile(fastaFileName, fastaFile);
}
fastaFile.close();
- outputNames.push_back(fastaFileName);
+ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
if(compositeFASTAFileName != ""){
m->appendFiles(fastaFileName, compositeFASTAFileName);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
}
}
nameFile.close();
- outputNames.push_back(nameFileName);
+ outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
if(compositeNamesFileName != ""){
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
- string groupFileName = fileRoot + "shhh.groups";
+ string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+ int pos = fileRoot.find_first_of('.');
+ string fileGroup = fileRoot;
+ if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); }
+ string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
for(int i=0;i<numSeqs;i++){
if (m->control_pressed) { break; }
- groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+ groupFile << seqNameVector[i] << '\t' << fileGroup << endl;
}
groupFile.close();
- outputNames.push_back(groupFileName);
+ outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName);
}
catch(exception& e) {
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) +getOutputFileNameTag("counts");
ofstream otuCountsFile;
m->openOutputFile(otuCountsFileName, otuCountsFile);
}
}
otuCountsFile.close();
- outputNames.push_back(otuCountsFileName);
+ outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName);
}
catch(exception& e) {
int ShhherCommand::execute(){
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
getSingleLookUp(); if (m->control_pressed) { return 0; }
getJointLookUp(); if (m->control_pressed) { return 0; }
#endif
if(compositeFASTAFileName != ""){
- outputNames.push_back(compositeFASTAFileName);
- outputNames.push_back(compositeNamesFileName);
+ outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName);
+ outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName);
}
m->mothurOutEndLine();
//divide the groups between the processors
vector<linePair> lines;
+ vector<int> numFilesToComplete;
int numFilesPerProcessor = filenames.size() / processors;
for (int i = 0; i < processors; i++) {
int startIndex = i * numFilesPerProcessor;
int endIndex = (i+1) * numFilesPerProcessor;
if(i == (processors - 1)){ endIndex = filenames.size(); }
lines.push_back(linePair(startIndex, endIndex));
+ numFilesToComplete.push_back((endIndex-startIndex));
}
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
process++;
}else if (pid == 0){
num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else {
m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
//Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct.
//Above fork() will clone, so memory is separate, but that's not the case with windows,
//////////////////////////////////////////////////////////////////////////////////////////////////////
-
+ /*
vector<shhhFlowsData*> pDataArray;
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
-
+ */
#endif
for (int i=0;i<processIDS.size();i++) {
+ ifstream in;
+ string tempFile = compositeFASTAFileName + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) {
+ int tempNum = 0;
+ in >> tempNum;
+ if (tempNum != numFilesToComplete[i+1]) {
+ m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n");
+ }
+ }
+ in.close(); m->mothurRemove(tempFile);
+
if (compositeFASTAFileName != "") {
m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
try {
+ int numCompleted = 0;
+
for(int i=start;i<end;i++){
if (m->control_pressed) { break; }
vector<int> uniqueLengths;
int numFlowCells;
+ if (m->debug) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); }
int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
if (m->control_pressed) { break; }
vector<int> nSeqsBreaks;
vector<int> nOTUsBreaks;
+ if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
+
dist.assign(numSeqs * numOTUs, 0);
change.assign(numOTUs, 1);
centroids.assign(numOTUs, -1);
nSeqsBreaks[1] = numSeqs;
nOTUsBreaks[1] = numOTUs;
+ if (m->debug) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); }
+
if (m->control_pressed) { break; }
double maxDelta = 0;
if (m->control_pressed) { break; }
vector<int> otuCounts(numOTUs, 0);
- for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
+ for(int j=0;j<numSeqs;j++) { otuCounts[otuData[j]]++; }
calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
if ((large) && (g == 0)) { flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; }
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir = m->hasPath(flowFileName); }
- string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
- string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
- string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
- string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
- string groupFileName = fileRoot + "shhh.groups";
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile");
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("counts");
+ string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+ int pos = fileRoot.find_first_of('.');
+ string fileGroup = fileRoot;
+ if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); }
+ string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (m->control_pressed) { break; }
writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (m->control_pressed) { break; }
- writeGroups(groupFileName, fileRoot, numSeqs, seqNameVector); if (m->control_pressed) { break; }
+ writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector); if (m->control_pressed) { break; }
if (large) {
if (g > 0) {
- m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.qual"));
+ m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("qfile")));
m->mothurRemove(qualityFileName);
- m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.fasta"));
+ m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("fasta")));
m->mothurRemove(fastaFileName);
- m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.names"));
+ m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("name")));
m->mothurRemove(nameFileName);
- m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.counts"));
+ m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("counts")));
m->mothurRemove(otuCountsFileName);
- m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups"));
+ m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("group")));
m->mothurRemove(groupFileName);
}
m->mothurRemove(theseFlowFileNames[g]);
}
}
+ numCompleted++;
m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
- return 0;
+ return numCompleted;
}catch(exception& e) {
m->errorOut(e, "ShhherCommand", "driver");
thisFlowDataIntI.clear();
thisNameMap.clear();
- flowFile >> numFlowCells;
+ string numFlowTest;
+ flowFile >> numFlowTest;
+
+ if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
+ else { convert(numFlowTest, numFlowCells); }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); }
int index = 0;//pcluster
while(!flowFile.eof()){
if (m->control_pressed) { break; }
flowFile >> seqName >> currentNumFlowCells;
+
thisLengths.push_back(currentNumFlowCells);
thisSeqNameVector.push_back(seqName);
thisNameMap[seqName] = index++;//pcluster
-
+
+ if (m->debug) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); }
+
for(int i=0;i<numFlowCells;i++){
flowFile >> intensity;
if(intensity > 9.99) { intensity = 9.99; }
read->read(clusterNameMap);
ListVector* list = read->getListVector();
- SparseMatrix* matrix = read->getMatrix();
+ SparseDistanceMatrix* matrix = read->getDMatrix();
delete read;
delete clusterNameMap;
listFile >> label >> numOTUs;
+ if (m->debug) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); }
+
otuData.assign(numSeqs, 0);
cumNumSeqs.assign(numOTUs, 0);
nSeqsPerOTU.assign(numOTUs, 0);
for(int i=0;i<numOTUs;i++){
if (m->control_pressed) { break; }
+ if (m->debug) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); }
listFile >> singleOTU;
}
}
qualityFile.close();
- outputNames.push_back(qualityFileName);
+ outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName);
}
catch(exception& e) {
}
fastaFile.close();
- outputNames.push_back(fastaFileName);
+ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
if(thisCompositeFASTAFileName != ""){
m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
}
}
nameFile.close();
- outputNames.push_back(nameFileName);
+ outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
if(thisCompositeNamesFileName != ""){
groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
}
groupFile.close();
- outputNames.push_back(groupFileName);
+ outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName);
}
catch(exception& e) {
}
}
otuCountsFile.close();
- outputNames.push_back(otuCountsFileName);
+ outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName);
}
catch(exception& e) {