X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=normalizesharedcommand.cpp;h=acd8208ea7ebb20a3d3810d254d0b24f5386d4d3;hp=5f3cacbf194a36205b35f9772aac17cdae4352c2;hb=615301e57c25e241356a9c2380648d117709458d;hpb=1d898dc6edaf9e9f287fab53bf1f21fb29757a17 diff --git a/normalizesharedcommand.cpp b/normalizesharedcommand.cpp index 5f3cacb..acd8208 100644 --- a/normalizesharedcommand.cpp +++ b/normalizesharedcommand.cpp @@ -12,15 +12,15 @@ //********************************************************************************************************************** vector NormalizeSharedCommand::setParameters(){ try { - CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared); - CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod); - CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm); - CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(pshared); + CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(prelabund); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "","",false,false,true); parameters.push_back(pmethod); + CommandParameter pnorm("norm", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pnorm); + CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakerelabund); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -53,8 +53,21 @@ string NormalizeSharedCommand::getHelpString(){ exit(1); } } - - +//********************************************************************************************************************** +string NormalizeSharedCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "shared") { pattern = "[filename],[distance],norm.shared"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "NormalizeSharedCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** NormalizeSharedCommand::NormalizeSharedCommand(){ try { @@ -77,6 +90,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector myArray = setParameters(); @@ -121,12 +135,12 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { sharedfile = validParameter.validFile(parameters, "shared", true); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } - else { format = "sharedfile"; inputfile = sharedfile; } + else { format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund", true); if (relabundfile == "not open") { relabundfile = ""; abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } - else { format = "relabund"; inputfile = relabundfile; } + else { format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); } if ((sharedfile == "") && (relabundfile == "")) { @@ -165,7 +179,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; } @@ -175,7 +189,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { if (temp == "not found") { norm = 0; //once you have read, set norm to smallest group number }else { - convert(temp, norm); + m->mothurConvert(temp, norm); if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; } } @@ -196,10 +210,6 @@ int NormalizeSharedCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared"; - ofstream out; - m->openOutputFile(outputFileName, out); - input = new InputData(inputfile, format); //you are reading a sharedfile and you do not want to make relabund @@ -209,18 +219,20 @@ int NormalizeSharedCommand::execute(){ //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { - m->Groups.clear(); + m->clearGroups(); + vector mGroups; vector temp; for (int i = 0; i < lookup.size(); i++) { if (lookup[i]->getNumSeqs() < norm) { m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookup[i]; }else { - m->Groups.push_back(lookup[i]->getGroup()); + mGroups.push_back(lookup[i]->getGroup()); temp.push_back(lookup[i]); } } lookup = temp; + m->setGroups(mGroups); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. @@ -239,15 +251,16 @@ int NormalizeSharedCommand::execute(){ m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine(); } + //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookup, out); + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -260,7 +273,7 @@ int NormalizeSharedCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookup, out); + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -273,13 +286,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -301,7 +314,7 @@ int NormalizeSharedCommand::execute(){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookup, out); + normalize(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -316,18 +329,20 @@ int NormalizeSharedCommand::execute(){ //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { - m->Groups.clear(); + m->clearGroups(); + vector mGroups; vector temp; for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i]->getNumSeqs() < norm) { m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookupFloat[i]; }else { - m->Groups.push_back(lookupFloat[i]->getGroup()); + mGroups.push_back(lookupFloat[i]->getGroup()); temp.push_back(lookupFloat[i]); } } lookupFloat = temp; + m->setGroups(mGroups); } //set norm to smallest group number @@ -345,12 +360,13 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookupFloat, out); + + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -361,9 +377,10 @@ int NormalizeSharedCommand::execute(){ for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); - m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookupFloat, out); + m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); + + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -376,13 +393,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; lookupFloat[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookupFloat = input->getSharedRAbundFloatVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -404,22 +421,22 @@ int NormalizeSharedCommand::execute(){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookupFloat, out); + normalize(lookupFloat); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; - out.close(); - if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;} + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set shared file as new current sharedfile @@ -438,21 +455,37 @@ int NormalizeSharedCommand::execute(){ } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + string outputFileName = getOutputFileName("shared",variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); if (method == "totalgroup") { + + //save numSeqs since they will change as the data is normalized + vector sizes; + for (int i = 0; i < thisLookUp.size(); i++) { sizes.push_back(thisLookUp[i]->getNumSeqs()); } for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } int abund = thisLookUp[i]->getAbundance(j); - float relabund = abund / (float) thisLookUp[i]->getNumSeqs(); + float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; //round to nearest int @@ -466,7 +499,7 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -497,12 +530,18 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o eliminateZeroOTUS(thisLookUp); + + thisLookUp[0]->printHeaders(out); for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -512,21 +551,38 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + string outputFileName = getOutputFileName("shared",variables); + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } if (method == "totalgroup") { + //save numSeqs since they will change as the data is normalized + vector sizes; + for (int i = 0; i < thisLookUp.size(); i++) { sizes.push_back(thisLookUp[i]->getNumSeqs()); } + for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } float abund = thisLookUp[i]->getAbundance(j); - float relabund = abund / (float) thisLookUp[i]->getNumSeqs(); + float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup()); @@ -536,7 +592,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook }else if (method == "zscore") { for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -564,11 +620,17 @@ int NormalizeSharedCommand::normalize(vector& thisLook eliminateZeroOTUS(thisLookUp); + thisLookUp[0]->printHeaders(out); + for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -589,6 +651,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl } //for each bin + vector newBinLabels; + string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -603,12 +667,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; @@ -631,6 +707,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& } //for each bin + vector newBinLabels; + string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -645,12 +723,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0;