From 1bf53bca7e26bf091588bc8ca6e68cbfae1df6fe Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 18 Jul 2011 18:27:18 +0000 Subject: [PATCH] changed normalize.shared and sub.sample commands to fix bug in normalize.shared and make otu labels persist. --- corraxescommand.cpp | 8 +++ engine.cpp | 8 +++ inputdata.cpp | 4 +- metastatscommand.cpp | 24 ++++----- mothurmetastats.cpp | 4 +- mothurout.h | 2 + normalizesharedcommand.cpp | 99 +++++++++++++++++++++++++------------ normalizesharedcommand.h | 4 +- sharedcommand.cpp | 1 + sharedordervector.h | 2 +- sharedrabundfloatvector.cpp | 47 +++++++++++++++++- sharedrabundvector.cpp | 52 +++++++++++++++++-- sharedrabundvector.h | 2 + subsamplecommand.cpp | 55 +++++++++++++-------- subsamplecommand.h | 2 +- trimseqscommand.cpp | 98 ++++++++++++++++++++++++------------ trimseqscommand.h | 4 +- 17 files changed, 303 insertions(+), 113 deletions(-) diff --git a/corraxescommand.cpp b/corraxescommand.cpp index 20bdcbd..07c5707 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -759,6 +759,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector& thisloo } //for each bin + vector newBinLabels; for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -773,12 +774,19 @@ int CorrAxesCommand::eliminateZeroOTUS(vector& thisloo for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + (i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; diff --git a/engine.cpp b/engine.cpp index 3108f37..af88e4f 100644 --- a/engine.cpp +++ b/engine.cpp @@ -186,6 +186,8 @@ bool InteractEngine::getInput(){ mout->names.clear(); mout->saveNextLabel = ""; mout->printedHeaders = false; + mout->currentBinLabels.clear(); + mout->binLabelsInFile.clear(); Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); @@ -368,6 +370,9 @@ bool BatchEngine::getInput(){ mout->names.clear(); mout->saveNextLabel = ""; mout->printedHeaders = false; + mout->currentBinLabels.clear(); + mout->binLabelsInFile.clear(); + Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); @@ -532,6 +537,9 @@ bool ScriptEngine::getInput(){ mout->names.clear(); mout->saveNextLabel = ""; mout->printedHeaders = false; + mout->currentBinLabels.clear(); + mout->binLabelsInFile.clear(); + Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); diff --git a/inputdata.cpp b/inputdata.cpp index ac215ca..8b1829a 100644 --- a/inputdata.cpp +++ b/inputdata.cpp @@ -19,15 +19,12 @@ InputData::InputData(string fName, string f) : format(f){ m->openInputFile(fName, fileHandle); filename = fName; m->saveNextLabel = ""; - - } /***********************************************************************/ InputData::~InputData(){ fileHandle.close(); m->saveNextLabel = ""; - } /***********************************************************************/ @@ -51,6 +48,7 @@ InputData::InputData(string fName, string orderFileName, string f) : format(f){ m->openInputFile(fName, fileHandle); m->saveNextLabel = ""; + } catch(exception& e) { m->errorOut(e, "InputData", "InputData"); diff --git a/metastatscommand.cpp b/metastatscommand.cpp index 2a55ff0..7942bfc 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -397,13 +397,13 @@ int MetaStatsCommand::driver(int start, int num, vector& th //get filename string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); - int nameLength = outputFileName.length(); - char * output = new char[nameLength]; - strcpy(output, outputFileName.c_str()); + //int nameLength = outputFileName.length(); + //char * output = new char[nameLength]; + //strcpy(output, outputFileName.c_str()); //build matrix from shared rabunds - double** data; - data = new double*[thisLookUp[0]->getNumBins()]; + //double** data; + //data = new double*[thisLookUp[0]->getNumBins()]; vector< vector > data2; data2.resize(thisLookUp[0]->getNumBins()); @@ -430,29 +430,29 @@ int MetaStatsCommand::driver(int start, int num, vector& th }else { //fill data for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - data[j] = new double[subset.size()]; + //data[j] = new double[subset.size()]; data2[j].resize(subset.size(), 0.0); for (int i = 0; i < subset.size(); i++) { - data[j][i] = (subset[i]->getAbundance(j)); + //data[j][i] = (subset[i]->getAbundance(j)); data2[j][i] = (subset[i]->getAbundance(j)); } } m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); - metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); + //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); m->mothurOutEndLine(); MothurMetastats mothurMeta(threshold, iters); - mothurMeta.runMetastats(outputFileName+".myVersion" , data2, setACount); + mothurMeta.runMetastats(outputFileName , data2, setACount); m->mothurOutEndLine(); m->mothurOutEndLine(); } //free memory - delete output; - for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; } - delete[] data; + //delete output; + //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; } + //delete[] data; } return 0; diff --git a/mothurmetastats.cpp b/mothurmetastats.cpp index ed0aaef..c376218 100644 --- a/mothurmetastats.cpp +++ b/mothurmetastats.cpp @@ -452,8 +452,8 @@ int MothurMetastats::calc_twosample_ts(vector& Pmatrix, int secondGroupi for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); } vector< vector > C2; C2.resize(row); for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); } - vector storage; storage.resize(row, 0.0); - vector tool; tool.resize(row, 0.0); + vector storage; storage.resize(a, 0.0); + vector tool; tool.resize(a, 0.0); double xbardiff = 0.0; double denom = 0.0; meanvar(Pmatrix, secondGroupingStart, storage); diff --git a/mothurout.h b/mothurout.h index 6cdf493..47e5ca4 100644 --- a/mothurout.h +++ b/mothurout.h @@ -40,6 +40,8 @@ class MothurOut { vector Treenames; map names; vector namesOfGroups; + vector binLabelsInFile; + vector currentBinLabels; string saveNextLabel, argv, sharedHeaderMode; bool printedHeaders; diff --git a/normalizesharedcommand.cpp b/normalizesharedcommand.cpp index 10c7a30..1066ab7 100644 --- a/normalizesharedcommand.cpp +++ b/normalizesharedcommand.cpp @@ -197,10 +197,6 @@ int NormalizeSharedCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared"; - ofstream out; - m->openOutputFile(outputFileName, out); - input = new InputData(inputfile, format); //you are reading a sharedfile and you do not want to make relabund @@ -244,13 +240,12 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - normalize(lookup, out); + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -262,8 +257,8 @@ int NormalizeSharedCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - normalize(lookup, out); + + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -276,13 +271,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -303,8 +298,8 @@ int NormalizeSharedCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - normalize(lookup, out); + + normalize(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -348,14 +343,13 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->Groups.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->Groups.clear(); return 0; } if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); } - normalize(lookupFloat, out); + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -368,8 +362,8 @@ int NormalizeSharedCommand::execute(){ lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); } - normalize(lookupFloat, out); + + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -382,13 +376,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; lookupFloat[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } //get next line to process lookupFloat = input->getSharedRAbundFloatVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -410,8 +404,7 @@ int NormalizeSharedCommand::execute(){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); } - normalize(lookupFloat, out); + normalize(lookupFloat); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } @@ -420,13 +413,13 @@ int NormalizeSharedCommand::execute(){ //reset groups parameter m->Groups.clear(); delete input; - out.close(); - if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0;} + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set shared file as new current sharedfile @@ -445,9 +438,17 @@ int NormalizeSharedCommand::execute(){ } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared"; + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); if (method == "totalgroup") { @@ -459,7 +460,7 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } int abund = thisLookUp[i]->getAbundance(j); @@ -477,7 +478,7 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -508,12 +509,18 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o eliminateZeroOTUS(thisLookUp); + + thisLookUp[0]->printHeaders(out); for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -523,8 +530,18 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared"; + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } if (method == "totalgroup") { @@ -537,7 +554,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } float abund = thisLookUp[i]->getAbundance(j); @@ -551,7 +568,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook }else if (method == "zscore") { for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -579,11 +596,17 @@ int NormalizeSharedCommand::normalize(vector& thisLook eliminateZeroOTUS(thisLookUp); + thisLookUp[0]->printHeaders(out); + for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -604,6 +627,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl } //for each bin + vector newBinLabels; for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -618,12 +642,18 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + (i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; @@ -646,6 +676,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& } //for each bin + vector newBinLabels; for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -660,12 +691,18 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + (i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; diff --git a/normalizesharedcommand.h b/normalizesharedcommand.h index c252741..4b6f7fd 100644 --- a/normalizesharedcommand.h +++ b/normalizesharedcommand.h @@ -44,8 +44,8 @@ private: int norm; vector Groups, outputNames; - int normalize(vector&, ofstream&); - int normalize(vector&, ofstream&); + int normalize(vector&); + int normalize(vector&); int eliminateZeroOTUS(vector&); int eliminateZeroOTUS(vector&); diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 040fad8..2928214 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -531,6 +531,7 @@ int SharedCommand::eliminateZeroOTUS(vector& thislookup) { for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one } //else{ cout << "bin # " << i << " is all zeros" << endl; } } diff --git a/sharedordervector.h b/sharedordervector.h index 09d4959..7d383ac 100644 --- a/sharedordervector.h +++ b/sharedordervector.h @@ -24,7 +24,7 @@ struct individual { bool operator()(const individual& i1, const individual& i2) { return (i1.abundance > i2.abundance); } - individual() { group = ""; bin = 0; abundance = 0; } + individual() { group = ""; bin = 0; abundance = 0; } }; struct individualFloat { diff --git a/sharedrabundfloatvector.cpp b/sharedrabundfloatvector.cpp index e7ab225..8d91d7e 100644 --- a/sharedrabundfloatvector.cpp +++ b/sharedrabundfloatvector.cpp @@ -50,12 +50,33 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma //is this a shared file that has headers if (label == "label") { + //gets "group" + f >> label; m->gobble(f); + + //gets "numOtus" + f >> label; m->gobble(f); + //eat rest of line label = m->getline(f); m->gobble(f); + + //parse labels to save + istringstream iStringStream(label); + m->binLabelsInFile.clear(); + while(!iStringStream.eof()){ + if (m->control_pressed) { break; } + string temp; + iStringStream >> temp; m->gobble(iStringStream); + + m->binLabelsInFile.push_back(temp); + } + f >> label; } }else { label = m->saveNextLabel; } + //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling + m->currentBinLabels = m-> binLabelsInFile; + //read in first row since you know there is at least 1 group. f >> groupN >> num; @@ -231,12 +252,27 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){ try { output << "label\tGroup\tnumOtus\t"; if (m->sharedHeaderMode == "tax") { - for (int i = 0; i < numBins; i++) { output << "PhyloType" << (i+1) << '\t'; } + for (int i = 0; i < numBins; i++) { + + //if there is a bin label use it otherwise make one + string binLabel = "PhyloType" + toString(i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + output << binLabel << '\t'; + } output << endl; }else { - for (int i = 0; i < numBins; i++) { output << "Otu" << (i+1) << '\t'; } + for (int i = 0; i < numBins; i++) { + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + toString(i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + output << binLabel << '\t'; + } + output << endl; } + m->printedHeaders = true; } catch(exception& e) { @@ -442,6 +478,7 @@ int SharedRAbundFloatVector::eliminateZeroOTUS(vector& } //for each bin + vector newBinLabels; for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -456,12 +493,18 @@ int SharedRAbundFloatVector::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + (i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 8d0df60..74fb1c5 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -74,12 +74,33 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), //is this a shared file that has headers if (label == "label") { + //gets "group" + f >> label; m->gobble(f); + + //gets "numOtus" + f >> label; m->gobble(f); + //eat rest of line label = m->getline(f); m->gobble(f); + + //parse labels to save + istringstream iStringStream(label); + m->binLabelsInFile.clear(); + while(!iStringStream.eof()){ + if (m->control_pressed) { break; } + string temp; + iStringStream >> temp; m->gobble(iStringStream); + + m->binLabelsInFile.push_back(temp); + } + f >> label; } }else { label = m->saveNextLabel; } + //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling + m->currentBinLabels = m->binLabelsInFile; + //read in first row since you know there is at least 1 group. f >> groupN >> num; @@ -99,8 +120,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), lookup[0]->push_back(inputData, groupN); //abundance, bin, group push_back(inputData, groupN); - //numSeqs += inputData; - //numBins++; + if (inputData > maxRank) { maxRank = inputData; } } @@ -124,6 +144,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), //fill vector. for(int i=0;i> inputData; + lookup[count]->push_back(inputData, groupN); //abundance, bin, group } @@ -313,12 +334,27 @@ int SharedRAbundVector::size(){ /***********************************************************************/ void SharedRAbundVector::printHeaders(ostream& output){ try { + output << "label\tGroup\tnumOtus\t"; if (m->sharedHeaderMode == "tax") { - for (int i = 0; i < numBins; i++) { output << "PhyloType" << (i+1) << '\t'; } + for (int i = 0; i < numBins; i++) { + + //if there is a bin label use it otherwise make one + string binLabel = "PhyloType" + toString(i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + output << binLabel << '\t'; + } output << endl; }else { - for (int i = 0; i < numBins; i++) { output << "Otu" << (i+1) << '\t'; } + for (int i = 0; i < numBins; i++) { + //if there is a bin label use it otherwise make one + string mybinLabel = "Otu" + toString(i+1); + if (i < m->currentBinLabels.size()) { mybinLabel = m->currentBinLabels[i]; } + + output << mybinLabel << '\t'; + } + output << endl; } m->printedHeaders = true; @@ -419,6 +455,7 @@ int SharedRAbundVector::eliminateZeroOTUS(vector& thislooku } //for each bin + vector newBinLabels; for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -433,12 +470,19 @@ int SharedRAbundVector::eliminateZeroOTUS(vector& thislooku for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + (i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; diff --git a/sharedrabundvector.h b/sharedrabundvector.h index 02dc9f4..b86c884 100644 --- a/sharedrabundvector.h +++ b/sharedrabundvector.h @@ -40,6 +40,8 @@ public: int getMaxRank(); string getGroup(); void setGroup(string); + string getBinLabel(); + void setBinLabel(string); int getGroupIndex(); void setGroupIndex(int); diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index d538889..e365441 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -689,27 +689,17 @@ int SubSampleCommand::getSubSampleShared() { if (lookup.size() == 0) { m->mothurOut("The size you selected is too large, skipping shared file."); m->mothurOutEndLine(); delete input; return 0; } - string thisOutputDir = outputDir; - if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile); - - ofstream out; - m->openOutputFile(outputFileName, out); - outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); - - m->mothurOut("Sampling " + toString(size) + " from each group."); m->mothurOutEndLine(); //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } out.close(); return 0; } + if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - processShared(lookup, out); + processShared(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -723,8 +713,7 @@ int SubSampleCommand::getSubSampleShared() { lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - processShared(lookup, out); + processShared(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -742,7 +731,7 @@ int SubSampleCommand::getSubSampleShared() { } - if (m->control_pressed) { out.close(); return 0; } + if (m->control_pressed) { return 0; } //output error messages about any remaining user labels set::iterator it; @@ -764,14 +753,12 @@ int SubSampleCommand::getSubSampleShared() { m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - processShared(lookup, out); + processShared(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } delete input; - out.close(); return 0; @@ -782,9 +769,21 @@ int SubSampleCommand::getSubSampleShared() { } } //********************************************************************************************************************** -int SubSampleCommand::processShared(vector& thislookup, ofstream& out) { +int SubSampleCommand::processShared(vector& thislookup) { try { + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + ".subsample" + m->getExtension(sharedfile); + + + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); + int numBins = thislookup[0]->getNumBins(); for (int i = 0; i < thislookup.size(); i++) { int thisSize = thislookup[i]->getNumSeqs(); @@ -811,7 +810,7 @@ int SubSampleCommand::processShared(vector& thislookup, ofs for (int j = 0; j < size; j++) { - if (m->control_pressed) { delete order; return 0; } + if (m->control_pressed) { delete order; out.close(); return 0; } //get random number to sample from order between 0 and thisSize-1. int myrand = int((float)(thisSize) * (float)(rand()) / ((float)RAND_MAX+1.0)); @@ -828,13 +827,20 @@ int SubSampleCommand::processShared(vector& thislookup, ofs //subsampling may have created some otus with no sequences in them eliminateZeroOTUS(thislookup); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } + + thislookup[0]->printHeaders(out); for (int i = 0; i < thislookup.size(); i++) { out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t'; thislookup[i]->print(out); } + out.close(); + + //save mothurOut's binLabels to restore for next label + m->currentBinLabels = saveBinLabels; + return 0; } @@ -1517,6 +1523,7 @@ int SubSampleCommand::eliminateZeroOTUS(vector& thislookup) } //for each bin + vector newBinLabels; for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -1531,6 +1538,11 @@ int SubSampleCommand::eliminateZeroOTUS(vector& thislookup) for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu" + (i+1); + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } @@ -1538,6 +1550,7 @@ int SubSampleCommand::eliminateZeroOTUS(vector& thislookup) thislookup.clear(); thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; diff --git a/subsamplecommand.h b/subsamplecommand.h index 39d3c37..4be3570 100644 --- a/subsamplecommand.h +++ b/subsamplecommand.h @@ -51,7 +51,7 @@ private: int getSubSampleRabund(); int getSubSampleSabund(); int getSubSampleFasta(); - int processShared(vector&, ofstream&); + int processShared(vector&); int processRabund(RAbundVector*&, ofstream&); int processSabund(SAbundVector*&, ofstream&); int processList(ListVector*&, ofstream&, set&); diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 21de6ac..a8c3fc6 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -308,6 +308,7 @@ int TrimSeqsCommand::execute(){ numFPrimers = 0; //this needs to be initialized numRPrimers = 0; + createGroup = false; vector > fastaFileNames; vector > qualFileNames; vector > nameFileNames; @@ -343,9 +344,11 @@ int TrimSeqsCommand::execute(){ string outputGroupFileName; if(oligoFile != ""){ - outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; - outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); - getOligos(fastaFileNames, qualFileNames, nameFileNames); + createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames); + if (createGroup) { + outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); + } } vector fastaFilePos; @@ -505,7 +508,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string ofstream outGroupsFile; - if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); } + if (createGroup){ m->openOutputFile(groupFileName, outGroupsFile); } if(allFiles){ for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file @@ -541,7 +544,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (m->control_pressed) { inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); - if (oligoFile != "") { outGroupsFile.close(); } + if (createGroup) { outGroupsFile.close(); } if(qFileName != ""){ qFile.close(); @@ -646,30 +649,39 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } } - if(barcodes.size() != 0){ - string thisGroup = barcodeNameVector[barcodeIndex]; - if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } } - - outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; - - if (nameFile != "") { - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - vector thisSeqsNames; - m->splitAtChar(itName->second, thisSeqsNames, ','); - for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self - outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; - } - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } - else { groupCounts[it->first]++; } + if (createGroup) { + if(barcodes.size() != 0){ + string thisGroup = barcodeNameVector[barcodeIndex]; + if (primers.size() != 0) { + if (primerNameVector[primerIndex] != "") { + if(thisGroup != "") { + thisGroup += "." + primerNameVector[primerIndex]; + }else { + thisGroup = primerNameVector[primerIndex]; + } + } + } + outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; + + if (nameFile != "") { + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + vector thisSeqsNames; + m->splitAtChar(itName->second, thisSeqsNames, ','); + for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self + outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; + } + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + map::iterator it = groupCounts.find(thisGroup); + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } + else { groupCounts[it->first]++; } + + } } - if(allFiles){ ofstream output; m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); @@ -728,7 +740,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); - if (oligoFile != "") { outGroupsFile.close(); } + if (createGroup) { outGroupsFile.close(); } if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); } if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); } @@ -800,7 +812,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName qLines[process]); //pass groupCounts to parent - if(oligoFile != ""){ + if(createGroup){ ofstream out; string tempFile = filename + toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); @@ -865,7 +877,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp")); } - if(oligoFile != ""){ + if(createGroup){ m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp")); } @@ -892,7 +904,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName } } - if(oligoFile != ""){ + if(createGroup){ ifstream in; string tempFile = filename + toString(processIDS[i]) + ".num.temp"; m->openInputFile(tempFile, in); @@ -1044,7 +1056,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector >& fastaFileNames, vector >& qualFileNames, vector >& nameFileNames){ +bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector >& qualFileNames, vector >& nameFileNames){ try { ifstream inOligos; m->openInputFile(oligoFile, inOligos); @@ -1199,7 +1211,29 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } numFPrimers = primers.size(); numRPrimers = revPrimer.size(); - + + bool allBlank = true; + for (int i = 0; i < barcodeNameVector.size(); i++) { + if (barcodeNameVector[i] != "") { + allBlank = false; + break; + } + } + for (int i = 0; i < primerNameVector.size(); i++) { + if (primerNameVector[i] != "") { + allBlank = false; + break; + } + } + + if (allBlank) { + m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); + allFiles = false; + return false; + } + + return true; + } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "getOligos"); diff --git a/trimseqscommand.h b/trimseqscommand.h index 02a88e8..13d1a41 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -42,7 +42,7 @@ private: linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {} }; - void getOligos(vector >&, vector >&, vector >&); + bool getOligos(vector >&, vector >&, vector >&); int stripBarcode(Sequence&, QualityScores&, int&); int stripForward(Sequence&, QualityScores&, int&); bool stripReverse(Sequence&, QualityScores&); @@ -56,7 +56,7 @@ private: bool compareDNASeq(string, string); int countDiffs(string, string); - bool abort; + bool abort, createGroup; string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir; bool flip, allFiles, qtrim; -- 2.39.2