X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=normalizesharedcommand.cpp;h=bbf9a8335a3c2847f89b8cf136670c03a73e9725;hb=a6cf29fa4dac0909c7582cb1094151d34093ee76;hp=a4bdd47099c350e77cfe238bcf34e254d1db9341;hpb=7bf9a81bba76538ecaf351ae208de3da4bf1b6dd;p=mothur.git diff --git a/normalizesharedcommand.cpp b/normalizesharedcommand.cpp index a4bdd47..bbf9a83 100644 --- a/normalizesharedcommand.cpp +++ b/normalizesharedcommand.cpp @@ -166,7 +166,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; } @@ -176,7 +176,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { if (temp == "not found") { norm = 0; //once you have read, set norm to smallest group number }else { - convert(temp, norm); + m->mothurConvert(temp, norm); if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; } } @@ -197,10 +197,6 @@ int NormalizeSharedCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared"; - ofstream out; - m->openOutputFile(outputFileName, out); - input = new InputData(inputfile, format); //you are reading a sharedfile and you do not want to make relabund @@ -210,18 +206,20 @@ int NormalizeSharedCommand::execute(){ //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { - m->Groups.clear(); + m->clearGroups(); + vector mGroups; vector temp; for (int i = 0; i < lookup.size(); i++) { if (lookup[i]->getNumSeqs() < norm) { m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookup[i]; }else { - m->Groups.push_back(lookup[i]->getGroup()); + mGroups.push_back(lookup[i]->getGroup()); temp.push_back(lookup[i]); } } lookup = temp; + m->setGroups(mGroups); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. @@ -244,13 +242,12 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - normalize(lookup, out); + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -262,8 +259,8 @@ int NormalizeSharedCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - normalize(lookup, out); + + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -276,13 +273,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -303,8 +300,8 @@ int NormalizeSharedCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - normalize(lookup, out); + + normalize(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -319,18 +316,20 @@ int NormalizeSharedCommand::execute(){ //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { - m->Groups.clear(); + m->clearGroups(); + vector mGroups; vector temp; for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i]->getNumSeqs() < norm) { m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookupFloat[i]; }else { - m->Groups.push_back(lookupFloat[i]->getGroup()); + mGroups.push_back(lookupFloat[i]->getGroup()); temp.push_back(lookupFloat[i]); } } lookupFloat = temp; + m->setGroups(mGroups); } //set norm to smallest group number @@ -348,14 +347,13 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); } - normalize(lookupFloat, out); + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -368,8 +366,8 @@ int NormalizeSharedCommand::execute(){ lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); } - normalize(lookupFloat, out); + + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -382,13 +380,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; lookupFloat[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookupFloat = input->getSharedRAbundFloatVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -410,23 +408,22 @@ int NormalizeSharedCommand::execute(){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); } - normalize(lookupFloat, out); + normalize(lookupFloat); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; - out.close(); - if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;} + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set shared file as new current sharedfile @@ -445,21 +442,33 @@ int NormalizeSharedCommand::execute(){ } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared"; + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); if (method == "totalgroup") { + + //save numSeqs since they will change as the data is normalized + vector sizes; + for (int i = 0; i < thisLookUp.size(); i++) { sizes.push_back(thisLookUp[i]->getNumSeqs()); } for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } int abund = thisLookUp[i]->getAbundance(j); - float relabund = abund / (float) thisLookUp[i]->getNumSeqs(); + float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; //round to nearest int @@ -473,7 +482,7 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -504,12 +513,18 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o eliminateZeroOTUS(thisLookUp); + + thisLookUp[0]->printHeaders(out); for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -519,21 +534,35 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared"; + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } if (method == "totalgroup") { + //save numSeqs since they will change as the data is normalized + vector sizes; + for (int i = 0; i < thisLookUp.size(); i++) { sizes.push_back(thisLookUp[i]->getNumSeqs()); } + for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } float abund = thisLookUp[i]->getAbundance(j); - float relabund = abund / (float) thisLookUp[i]->getNumSeqs(); + float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup()); @@ -543,7 +572,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook }else if (method == "zscore") { for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -571,11 +600,17 @@ int NormalizeSharedCommand::normalize(vector& thisLook eliminateZeroOTUS(thisLookUp); + thisLookUp[0]->printHeaders(out); + for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -596,6 +631,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl } //for each bin + vector newBinLabels; + string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -610,12 +647,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; @@ -638,6 +687,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& } //for each bin + vector newBinLabels; + string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -652,12 +703,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0;