X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=normalizesharedcommand.cpp;h=acd8208ea7ebb20a3d3810d254d0b24f5386d4d3;hp=37cfb5daa98b4eeb850b44d60e5e9d80fa268de5;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=348de0f8b17d84ede77081dcf67bd6ef43496677 diff --git a/normalizesharedcommand.cpp b/normalizesharedcommand.cpp index 37cfb5d..acd8208 100644 --- a/normalizesharedcommand.cpp +++ b/normalizesharedcommand.cpp @@ -10,49 +10,74 @@ #include "normalizesharedcommand.h" //********************************************************************************************************************** -vector NormalizeSharedCommand::getValidParameters(){ +vector NormalizeSharedCommand::setParameters(){ try { - string Array[] = {"groups","label","method","makerelabund","outputdir","inputdir","norm"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(pshared); + CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(prelabund); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "","",false,false,true); parameters.push_back(pmethod); + CommandParameter pnorm("norm", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pnorm); + CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakerelabund); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "NormalizeSharedCommand", "getValidParameters"); + m->errorOut(e, "NormalizeSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** -NormalizeSharedCommand::NormalizeSharedCommand(){ +string NormalizeSharedCommand::getHelpString(){ try { - abort = true; calledHelp = true; - vector tempOutNames; - outputTypes["shared"] = tempOutNames; + string helpString = ""; + helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label. shared or relabund is required, unless you have a valid current file.\n"; + helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; + helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; + helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n"; + helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n"; + helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n"; + helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n"; + helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n"; + helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; + helpString += "The normalize.shared command outputs a .norm.shared file.\n"; + helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand"); + m->errorOut(e, "NormalizeSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** -vector NormalizeSharedCommand::getRequiredParameters(){ - try { - vector myArray; - return myArray; - } - catch(exception& e) { - m->errorOut(e, "NormalizeSharedCommand", "getRequiredParameters"); - exit(1); - } +string NormalizeSharedCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "shared") { pattern = "[filename],[distance],norm.shared"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "NormalizeSharedCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** -vector NormalizeSharedCommand::getRequiredFiles(){ +NormalizeSharedCommand::NormalizeSharedCommand(){ try { - string Array[] = {"shared"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["shared"] = tempOutNames; } catch(exception& e) { - m->errorOut(e, "NormalizeSharedCommand", "getRequiredFiles"); + m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand"); exit(1); } } @@ -60,26 +85,24 @@ vector NormalizeSharedCommand::getRequiredFiles(){ NormalizeSharedCommand::NormalizeSharedCommand(string option) { try { - globaldata = GlobalData::getInstance(); abort = false; calledHelp = false; allLines = 1; - labels.clear(); //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string AlignArray[] = {"groups","label","method","makerelabund","outputdir","inputdir","norm"}; - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); + map::iterator it; ValidParameters validParameter; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } @@ -87,20 +110,59 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { vector tempOutNames; outputTypes["shared"] = tempOutNames; - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + + it = parameters.find("relabund"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["relabund"] = inputDir + it->second; } + } } - //make sure the user has already run the read.otu command - if ((globaldata->getSharedFile() == "") && (globaldata->getRelAbundFile() == "")) { - m->mothurOut("You must read a list and a group, shared or relabund file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true; + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); } + + relabundfile = validParameter.validFile(parameters, "relabund", true); + if (relabundfile == "not open") { relabundfile = ""; abort = true; } + else if (relabundfile == "not found") { relabundfile = ""; } + else { format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); } + + + if ((sharedfile == "") && (relabundfile == "")) { + //is there are current file available for any of these? + //give priority to shared, then list, then rabund, then sabund + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { + relabundfile = m->getRelAbundFile(); + if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); + abort = true; + } + } } - if ((globaldata->getSharedFile() != "") && (globaldata->getRelAbundFile() != "")) { - m->mothurOut("You may not use both a shared and relabund file as input for normalize.shared command."); m->mothurOutEndLine(); abort = true; - } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); } + //check for optional parameter and set defaults @@ -112,18 +174,12 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { else { allLines = 1; } } - //if the user has not specified any labels use the ones from read.otu - if (label == "") { - allLines = globaldata->allLines; - labels = globaldata->labels; - } - groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; m->splitAtDash(groups, Groups); - globaldata->Groups = Groups; + m->setGroups(Groups); } method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; } @@ -133,15 +189,12 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { if (temp == "not found") { norm = 0; //once you have read, set norm to smallest group number }else { - convert(temp, norm); + m->mothurConvert(temp, norm); if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; } } temp = validParameter.validFile(parameters, "makerelabund", false); if (temp == "") { temp = "f"; } makeRelabund = m->isTrue(temp); - - if ((globaldata->getFormat() != "sharedfile") && makeRelabund) { m->mothurOut("makerelabund can only be used with a shared file."); m->mothurOutEndLine(); } - } } @@ -150,35 +203,6 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { exit(1); } } - -//********************************************************************************************************************** - -void NormalizeSharedCommand::help(){ - try { - m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group, shared or relabund file.\n"); - m->mothurOut("The normalize.shared command parameters are groups, method, norm, makerelabund and label. No parameters are required.\n"); - m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"); - m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"); - m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n"); - m->mothurOut("The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n"); - m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n"); - m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n"); - m->mothurOut("Example normalize.shared(groups=A-B-C, scale=totalgroup).\n"); - m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"); - m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); - - } - catch(exception& e) { - m->errorOut(e, "NormalizeSharedCommand", "help"); - exit(1); - } -} - -//********************************************************************************************************************** - -NormalizeSharedCommand::~NormalizeSharedCommand(){} - //********************************************************************************************************************** int NormalizeSharedCommand::execute(){ @@ -186,18 +210,31 @@ int NormalizeSharedCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared"; - ofstream out; - m->openOutputFile(outputFileName, out); + input = new InputData(inputfile, format); - if (globaldata->getFormat() == "sharedfile") { input = new InputData(globaldata->inputFileName, "sharedfile"); } - else { input = new InputData(globaldata->inputFileName, "relabund"); } - //you are reading a sharedfile and you do not want to make relabund - if ((globaldata->getFormat() == "sharedfile") && (!makeRelabund)) { + if ((format == "sharedfile") && (!makeRelabund)) { lookup = input->getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); + //look for groups whose numseqs is below norm and remove them, warning the user + if (norm != 0) { + m->clearGroups(); + vector mGroups; + vector temp; + for (int i = 0; i < lookup.size(); i++) { + if (lookup[i]->getNumSeqs() < norm) { + m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); + delete lookup[i]; + }else { + mGroups.push_back(lookup[i]->getGroup()); + temp.push_back(lookup[i]); + } + } + lookup = temp; + m->setGroups(mGroups); + } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; @@ -214,15 +251,16 @@ int NormalizeSharedCommand::execute(){ m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine(); } + //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookup, out); + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -235,7 +273,7 @@ int NormalizeSharedCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookup, out); + normalize(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -248,13 +286,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -276,7 +314,7 @@ int NormalizeSharedCommand::execute(){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookup, out); + normalize(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -289,6 +327,24 @@ int NormalizeSharedCommand::execute(){ set processedLabels; set userLabels = labels; + //look for groups whose numseqs is below norm and remove them, warning the user + if (norm != 0) { + m->clearGroups(); + vector mGroups; + vector temp; + for (int i = 0; i < lookupFloat.size(); i++) { + if (lookupFloat[i]->getNumSeqs() < norm) { + m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); + delete lookupFloat[i]; + }else { + mGroups.push_back(lookupFloat[i]->getGroup()); + temp.push_back(lookupFloat[i]); + } + } + lookupFloat = temp; + m->setGroups(mGroups); + } + //set norm to smallest group number if (method == "totalgroup") { if (norm == 0) { @@ -304,12 +360,13 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } globaldata->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookupFloat, out); + + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -320,9 +377,10 @@ int NormalizeSharedCommand::execute(){ for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); - m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookupFloat, out); + m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); + + normalize(lookupFloat); processedLabels.insert(lookupFloat[0]->getLabel()); userLabels.erase(lookupFloat[0]->getLabel()); @@ -335,13 +393,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; lookupFloat[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookupFloat = input->getSharedRAbundFloatVectors(); } - if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); out.close(); remove(outputFileName.c_str()); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -363,22 +421,22 @@ int NormalizeSharedCommand::execute(){ m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine(); - normalize(lookupFloat, out); + normalize(lookupFloat); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } } //reset groups parameter - globaldata->Groups.clear(); + m->clearGroups(); delete input; - out.close(); - if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;} + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set shared file as new current sharedfile @@ -397,21 +455,37 @@ int NormalizeSharedCommand::execute(){ } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + string outputFileName = getOutputFileName("shared",variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); if (method == "totalgroup") { + + //save numSeqs since they will change as the data is normalized + vector sizes; + for (int i = 0; i < thisLookUp.size(); i++) { sizes.push_back(thisLookUp[i]->getNumSeqs()); } for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } int abund = thisLookUp[i]->getAbundance(j); - float relabund = abund / (float) thisLookUp[i]->getNumSeqs(); + float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; //round to nearest int @@ -425,7 +499,7 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -456,12 +530,18 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o eliminateZeroOTUS(thisLookUp); + + thisLookUp[0]->printHeaders(out); for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -471,21 +551,38 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp, o } //********************************************************************************************************************** -int NormalizeSharedCommand::normalize(vector& thisLookUp, ofstream& out){ +int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { + + //save mothurOut's binLabels to restore for next label + vector saveBinLabels = m->currentBinLabels; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + string outputFileName = getOutputFileName("shared",variables); + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); + + if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } if (method == "totalgroup") { + //save numSeqs since they will change as the data is normalized + vector sizes; + for (int i = 0; i < thisLookUp.size(); i++) { sizes.push_back(thisLookUp[i]->getNumSeqs()); } + for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { for (int i = 0; i < thisLookUp.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } float abund = thisLookUp[i]->getAbundance(j); - float relabund = abund / (float) thisLookUp[i]->getNumSeqs(); + float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup()); @@ -495,7 +592,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook }else if (method == "zscore") { for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } //calc mean float mean = 0.0; @@ -523,11 +620,17 @@ int NormalizeSharedCommand::normalize(vector& thisLook eliminateZeroOTUS(thisLookUp); + thisLookUp[0]->printHeaders(out); + for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t'; thisLookUp[i]->print(out); } + out.close(); + + m->currentBinLabels = saveBinLabels; + return 0; } catch(exception& e) { @@ -548,6 +651,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl } //for each bin + vector newBinLabels; + string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -562,12 +667,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0; @@ -590,6 +707,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& } //for each bin + vector newBinLabels; + string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -604,12 +723,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + + newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; + m->currentBinLabels = newBinLabels; return 0;