X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=indicatorcommand.cpp;h=2d1c63e21dbcd82df19957cb533d1e420e434fdc;hb=ae9c5b48da9047548f86fc84897926d9f5102312;hp=4c9baee4697e8cabbe313627196b780d8f26fec3;hpb=d9b668f68b99f92ecdc71dd8cd363cb4e27107f9;p=mothur.git diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 4c9baee..2d1c63e 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -8,35 +8,58 @@ */ #include "indicatorcommand.h" +#include "sharedutilities.h" + + //********************************************************************************************************************** -vector IndicatorCommand::getValidParameters(){ +vector IndicatorCommand::setParameters(){ try { - string Array[] = {"tree","shared","relabund","label","groups","outputdir","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); + CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(pdesign); + CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared); + CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund); + CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(ptree); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "IndicatorCommand", "getValidParameters"); + m->errorOut(e, "IndicatorCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** -vector IndicatorCommand::getRequiredParameters(){ +string IndicatorCommand::getHelpString(){ try { - string Array[] = {"label","tree"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; + string helpString = ""; + helpString += "The indicator command reads a shared or relabund file and a tree or design file, and outputs a .indicator.tre and .indicator.summary file. \n"; + helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; + helpString += "The summary file lists the indicator value for each OTU for each node.\n"; + helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n"; + helpString += "The design parameter allows you to provide a design file to relate the tree to the shared or relabund file.\n"; + helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n"; + helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n"; + helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000."; + helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"; + helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "IndicatorCommand", "getRequiredParameters"); + m->errorOut(e, "IndicatorCommand", "getHelpString"); exit(1); } } + //********************************************************************************************************************** IndicatorCommand::IndicatorCommand(){ try { - abort = true; - //initialize outputTypes + abort = true; calledHelp = true; + setParameters(); vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["summary"] = tempOutNames; @@ -46,31 +69,17 @@ IndicatorCommand::IndicatorCommand(){ exit(1); } } - -//********************************************************************************************************************** -vector IndicatorCommand::getRequiredFiles(){ - try { - vector myArray; - return myArray; - } - catch(exception& e) { - m->errorOut(e, "IndicatorCommand", "getRequiredFiles"); - exit(1); - } -} //********************************************************************************************************************** IndicatorCommand::IndicatorCommand(string option) { try { - globaldata = GlobalData::getInstance(); - abort = false; + abort = false; calledHelp = false; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string Array[] = {"tree","shared","relabund","groups","label","outputdir","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -83,7 +92,11 @@ IndicatorCommand::IndicatorCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - globaldata->newRead(); + m->runParse = true; + m->Groups.clear(); + m->namesOfGroups.clear(); + m->Treenames.clear(); + m->names.clear(); vector tempOutNames; outputTypes["tree"] = tempOutNames; @@ -118,40 +131,78 @@ IndicatorCommand::IndicatorCommand(string option) { if (path == "") { parameters["relabund"] = inputDir + it->second; } } + it = parameters.find("design"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["design"] = inputDir + it->second; } + } } outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); - if (treefile == "not open") { abort = true; } - else if (treefile == "not found") { treefile = ""; m->mothurOut("tree is a required parameter for the indicator command."); m->mothurOutEndLine(); abort = true; } - else { globaldata->setTreeFile(treefile); globaldata->setFormat("tree"); } + if (treefile == "not open") { treefile = ""; abort = true; } + else if (treefile == "not found") { treefile = ""; } + else { m->setTreeFile(treefile); } sharedfile = validParameter.validFile(parameters, "shared", true); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } - else { inputFileName = sharedfile; } + else { inputFileName = sharedfile; m->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund", true); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } - else { inputFileName = relabundfile; } + else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); } + + designfile = validParameter.validFile(parameters, "design", true); + if (designfile == "not open") { designfile = ""; abort = true; } + else if (designfile == "not found") { designfile = ""; } + else { m->setDesignFile(designfile); } groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; pickedGroups = false; } - else { - pickedGroups = true; - m->splitAtDash(groups, Groups); - globaldata->Groups = Groups; - } + if (groups == "not found") { groups = ""; Groups.push_back("all"); } + else { m->splitAtDash(groups, Groups); } + m->Groups = Groups; label = validParameter.validFile(parameters, "label", false); - if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; } + if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } + + string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, iters); - if ((relabundfile == "") && (sharedfile == "")) { m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true; } + if ((relabundfile == "") && (sharedfile == "")) { + //is there are current file available for either of these? + //give priority to shared, then relabund + sharedfile = m->getSharedFile(); + if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { + relabundfile = m->getRelAbundFile(); + if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); + abort = true; + } + } + } + + + if ((designfile == "") && (treefile == "")) { + treefile = m->getTreeFile(); + if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); } + else { + designfile = m->getDesignFile(); + if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("[ERROR]: You must provide either a tree or design file."); m->mothurOutEndLine(); abort = true; + } + } + } - if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; } + if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; } } } @@ -162,125 +213,147 @@ IndicatorCommand::IndicatorCommand(string option) { } //********************************************************************************************************************** -void IndicatorCommand::help(){ - try { - m->mothurOut("The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n"); - m->mothurOut("The new tree contains labels at each internal node. The label is the OTU number of the indicator OTU.\n"); - m->mothurOut("The summary file lists the indicator value for each OTU for each node.\n"); - m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree and label parameter are required as well as either shared or relabund.\n"); - m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed. The groups may be entered separated by dashes.\n"); - m->mothurOut("The label parameter indicates at what distance your tree relates to the shared or relabund.\n"); - m->mothurOut("The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); - } - catch(exception& e) { - m->errorOut(e, "IndicatorCommand", "help"); - exit(1); - } -} - -//********************************************************************************************************************** - -IndicatorCommand::~IndicatorCommand(){} - -//********************************************************************************************************************** - int IndicatorCommand::execute(){ try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); + + //read designfile if given and set up globaldatas groups for read of sharedfiles + if (designfile != "") { + designMap = new GroupMap(designfile); + designMap->readDesignMap(); + + //fill Groups - checks for "all" and for any typo groups + SharedUtil* util = new SharedUtil(); + util->setGroups(Groups, designMap->namesOfGroups); + delete util; + + //loop through the Groups and fill Globaldata's Groups with the design file info + m->Groups = designMap->getNamesSeqs(Groups); + } /***************************************************/ // use smart distancing to get right sharedRabund // /***************************************************/ if (sharedfile != "") { getShared(); - if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; } }else { getSharedFloat(); - if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } + if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; } } + + //reset Globaldatas groups if needed + if (designfile != "") { m->Groups = Groups; } /***************************************************/ // reading tree info // /***************************************************/ - string groupfile = ""; - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - - globaldata->setGroupFile(groupfile); - treeMap = new TreeMap(); - bool mismatch = false; - for (int i = 0; i < globaldata->Treenames.size(); i++) { - //sanity check - is this a group that is not in the sharedfile? - if (!(m->inUsersGroups(globaldata->Treenames[i], globaldata->gGroupmap->namesOfGroups))) { - m->mothurOut("[ERROR]: " + globaldata->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); - mismatch = true; - } - treeMap->addSeq(globaldata->Treenames[i], "Group1"); - } + if (treefile != "") { + string groupfile = ""; + m->setTreeFile(treefile); + Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap + treeMap = new TreeMap(); + bool mismatch = false; - if (mismatch) { //cleanup and exit - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete treeMap; - return 0; - } + for (int i = 0; i < m->Treenames.size(); i++) { + //sanity check - is this a group that is not in the sharedfile? + if (designfile == "") { + if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) { + m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); + mismatch = true; + } + treeMap->addSeq(m->Treenames[i], "Group1"); + }else{ + vector myGroups; myGroups.push_back(m->Treenames[i]); + vector myNames = designMap->getNamesSeqs(myGroups); + + for(int k = 0; k < myNames.size(); k++) { + if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) { + m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); + mismatch = true; + } + } + treeMap->addSeq(m->Treenames[i], "Group1"); + } + } - globaldata->gTreemap = treeMap; - - read = new ReadNewickTree(treefile); - int readOk = read->read(); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); globaldata->gTree.clear(); delete globaldata->gTreemap; delete read; return 0; } - - vector T = globaldata->gTree; - - delete read; - - if (m->control_pressed) { - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - for (int i = 0; i < T.size(); i++) { delete T[i]; } globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; - } + if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; } + + if (mismatch) { //cleanup and exit + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } + delete treeMap; + return 0; + } + + read = new ReadNewickTree(treefile); + int readOk = read->read(treeMap); + + if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; } + + vector T = read->getTrees(); + + delete read; - T[0]->assembleTree(); + if (m->control_pressed) { + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } + for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0; + } - /***************************************************/ - // create ouptut tree - respecting pickedGroups // - /***************************************************/ - Tree* outputTree = new Tree(globaldata->Groups.size()); - - if (pickedGroups) { - outputTree->getSubTree(T[0], globaldata->Groups); - outputTree->assembleTree(); - }else{ - outputTree->getCopy(T[0]); + T[0]->assembleTree(); + + /***************************************************/ + // create ouptut tree - respecting pickedGroups // + /***************************************************/ + Tree* outputTree = new Tree(m->Groups.size(), treeMap); + + outputTree->getSubTree(T[0], m->Groups); outputTree->assembleTree(); + + //no longer need original tree, we have output tree to use and label + for (int i = 0; i < T.size(); i++) { delete T[i]; } + + + if (m->control_pressed) { + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } + delete outputTree; delete treeMap; return 0; + } + + /***************************************************/ + // get indicator species values // + /***************************************************/ + GetIndicatorSpecies(outputTree); + delete outputTree; delete treeMap; + + }else { //run with design file only + //get indicator species + GetIndicatorSpecies(); } - //no longer need original tree, we have output tree to use and label - for (int i = 0; i < T.size(); i++) { delete T[i]; } globaldata->gTree.clear(); + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - if (m->control_pressed) { - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete outputTree; delete globaldata->gTreemap; return 0; - } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - /***************************************************/ - // get indicator species values // - /***************************************************/ - GetIndicatorSpecies(outputTree); - - if (m->control_pressed) { - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - delete outputTree; delete globaldata->gTreemap; return 0; + //set tree file as new current treefile + if (treefile != "") { + string current = ""; + itTypes = outputTypes.find("tree"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); } + } } - m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -294,11 +367,153 @@ int IndicatorCommand::execute(){ } } //********************************************************************************************************************** +//divide shared or relabund file by groupings in the design file +//report all otu values to file +int IndicatorCommand::GetIndicatorSpecies(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary"; + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + + int numBins = 0; + if (sharedfile != "") { numBins = lookup[0]->getNumBins(); } + else { numBins = lookupFloat[0]->getNumBins(); } + + if (m->control_pressed) { out.close(); return 0; } + + /*****************************************************/ + //create vectors containing rabund info // + /*****************************************************/ + + vector indicatorValues; //size of numBins + vector pValues; + map< vector, vector > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. + + if (sharedfile != "") { + vector< vector > groupings; + set groupsAlreadyAdded; + vector subset; + + //for each grouping + for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + + for (int k = 0; k < lookup.size(); k++) { + //are you from this grouping? + if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) { + subset.push_back(lookup[k]); + groupsAlreadyAdded.insert(lookup[k]->getGroup()); + } + } + if (subset.size() != 0) { groupings.push_back(subset); } + subset.clear(); + } + + if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + + }else { + vector< vector > groupings; + set groupsAlreadyAdded; + vector subset; + + //for each grouping + for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + for (int k = 0; k < lookupFloat.size(); k++) { + //are you from this grouping? + if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) { + subset.push_back(lookupFloat[k]); + groupsAlreadyAdded.insert(lookupFloat[k]->getGroup()); + } + } + if (subset.size() != 0) { groupings.push_back(subset); } + subset.clear(); + } + + if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookupFloat.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + } + + if (m->control_pressed) { out.close(); return 0; } + + + /******************************************************/ + //output indicator values to table form // + /*****************************************************/ + int indicatorOTU; + double maxValue, pvalue; maxValue=0.0; pvalue=0.0; + out << "OTU\tIndicator_Value\tpValue" << endl; + for (int j = 0; j < indicatorValues.size(); j++) { + + if (m->control_pressed) { out.close(); return 0; } + + out << (j+1) << '\t' << indicatorValues[j] << '\t'; + + if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } + else { out << "<" << (1/(float)iters) << endl; } + + if (maxValue < indicatorValues[j]) { + maxValue = indicatorValues[j]; + indicatorOTU = j; + } + } + + m->mothurOutEndLine(); m->mothurOut("Species\tIndicatorValue\tpValue\n"); + cout << "OTU" << indicatorOTU+1 << '\t' << maxValue << '\t'; + string pValueString = "<" + toString((1/(float)iters)); + if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];} + else { cout << "<" << (1/(float)iters); } + m->mothurOutJustToLog("OTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString); + m->mothurOutEndLine(); + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies"); + exit(1); + } +} +//********************************************************************************************************************** //traverse tree finding indicator species values for each otu at each node //label node with otu number that has highest indicator value //report all otu values to file int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ try { + string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary"; @@ -306,7 +521,18 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ ofstream out; m->openOutputFile(outputFileName, out); - out << "Node\tOTU#\tIndVal" << endl; + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + + int numBins = 0; + if (sharedfile != "") { numBins = lookup[0]->getNumBins(); } + else { numBins = lookupFloat[0]->getNumBins(); } + + //print headings + out << "TreeNode\t"; + for (int i = 0; i < numBins; i++) { out << "OTU" << (i+1) << "_IndValue" << '\t' << "pValue" << '\t'; } + out << endl; + + m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicatorValue\tpValue\n"); string treeOutputDir = outputDir; if (outputDir == "") { treeOutputDir += m->hasPath(treefile); } @@ -324,7 +550,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //you need the distances to leaf to decide grouping below //this will also set branch lengths if the tree does not include them - map distToLeaf = getLengthToLeaf(T); + map distToRoot = getDistToRoot(T); //for each node for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) { @@ -336,25 +562,38 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ /*****************************************************/ vector indicatorValues; //size of numBins + vector pValues; + map< vector, vector > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. if (sharedfile != "") { vector< vector > groupings; - /*groupings.resize(1); - groupings[0].push_back(lookup[0]); - groupings[0].push_back(lookup[1]); - groupings[0].push_back(lookup[2]); - groupings[0].push_back(lookup[3]); - groupings[0].push_back(lookup[4]);*/ - //get nodes that will be a valid grouping //you are valid if you are not one of my descendants - //AND your distToLeaf is <= mine - //AND your distToLeaf is >= my smallest childs - //AND you were not added as part of a larger grouping + //AND your distToRoot is >= mine + //AND you were not added as part of a larger grouping. Largest nodes are added first. + set groupsAlreadyAdded; + //create a grouping with my grouping + vector subset; + int count = 0; + int doneCount = nodeToDescendants[i].size(); + for (int k = 0; k < lookup.size(); k++) { + //is this descendant of i + if ((nodeToDescendants[i].count(lookup[k]->getGroup()) != 0)) { + subset.push_back(lookup[k]); + groupsAlreadyAdded.insert(lookup[k]->getGroup()); + count++; + } + if (count == doneCount) { break; } //quit once you get the rabunds for this grouping + } + if (subset.size() != 0) { groupings.push_back(subset); } + + for (int j = (T->getNumNodes()-1); j >= 0; j--) { - if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) { + + + if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { vector subset; int count = 0; int doneCount = nodeToDescendants[j].size(); @@ -373,21 +612,49 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } } - if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + + indicatorValues = getValues(groupings, randomGroupingsMap); - indicatorValues = getValues(groupings); + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } }else { vector< vector > groupings; //get nodes that will be a valid grouping //you are valid if you are not one of my descendants - //AND your distToLeaf is <= mine - //AND your distToLeaf is >= my smallest childs - //AND you were not added as part of a larger grouping + //AND your distToRoot is >= mine + //AND you were not added as part of a larger grouping. Largest nodes are added first. + set groupsAlreadyAdded; + //create a grouping with my grouping + vector subset; + int count = 0; + int doneCount = nodeToDescendants[i].size(); + for (int k = 0; k < lookupFloat.size(); k++) { + //is this descendant of i + if ((nodeToDescendants[i].count(lookupFloat[k]->getGroup()) != 0)) { + subset.push_back(lookupFloat[k]); + groupsAlreadyAdded.insert(lookupFloat[k]->getGroup()); + count++; + } + if (count == doneCount) { break; } //quit once you get the rabunds for this grouping + } + if (subset.size() != 0) { groupings.push_back(subset); } + for (int j = (T->getNumNodes()-1); j >= 0; j--) { - if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) { + if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { vector subset; int count = 0; int doneCount = nodeToDescendants[j].size(); @@ -408,35 +675,58 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings); + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } } if (m->control_pressed) { out.close(); return 0; } + /******************************************************/ //output indicator values to table form + label tree // /*****************************************************/ - vector indicatorOTUs; - float largestValue = 0.0; + out << (i+1) << '\t'; + int indicatorOTU; + double maxValue, pvalue; maxValue=0.0; pvalue=0.0; for (int j = 0; j < indicatorValues.size(); j++) { if (m->control_pressed) { out.close(); return 0; } - out << (i+1) << '\t' << (j+1) << '\t' << indicatorValues[j] << endl; - - //show no favortism - if (indicatorValues[j] > largestValue) { - largestValue = indicatorValues[j]; - indicatorOTUs.clear(); - indicatorOTUs.push_back(j+1); - }else if (indicatorValues[j] == largestValue) { - indicatorOTUs.push_back(j+1); + if (pValues[j] < (1/(float)iters)) { + out << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t'; + }else { + out << indicatorValues[j] << '\t' << pValues[j] << '\t'; } - random_shuffle(indicatorOTUs.begin(), indicatorOTUs.end()); - - T->tree[i].setLabel(indicatorOTUs[0]); + if (maxValue < indicatorValues[j]) { + maxValue = indicatorValues[j]; + indicatorOTU = j; + } } + out << endl; + + T->tree[i].setLabel((i+1)); + + cout << i+1 << "\tOTU" << indicatorOTU+1 << '\t' << maxValue << '\t'; + string pValueString = "<" + toString((1/(float)iters)); + if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];} + else { cout << "<" << (1/(float)iters); } + m->mothurOutJustToLog(toString(i) + "\tOTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString); + m->mothurOutEndLine(); + } out.close(); @@ -456,9 +746,10 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } } //********************************************************************************************************************** -vector IndicatorCommand::getValues(vector< vector >& groupings){ +vector IndicatorCommand::getValues(vector< vector >& groupings, map< vector, vector > groupingsMap){ try { vector values; + map< vector, vector >::iterator it; //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { @@ -468,16 +759,27 @@ vector IndicatorCommand::getValues(vector< vector terms; float AijDenominator = 0.0; vector Bij; + //get overall abundance of each grouping for (int j = 0; j < groupings.size(); j++) { float totalAbund = 0; int numNotZero = 0; for (int k = 0; k < groupings[j].size(); k++) { - totalAbund += groupings[j][k]->getAbundance(i); - if (groupings[j][k]->getAbundance(i) != 0) { numNotZero++; } + vector temp; temp.push_back(j); temp.push_back(k); + it = groupingsMap.find(temp); + + if (it == groupingsMap.end()) { //this one didnt get moved + totalAbund += groupings[j][k]->getAbundance(i); + if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; } + }else { + totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i); + if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; } + } + } + //mean abundance float Aij = (totalAbund / (float) groupings[j].size()); terms.push_back(Aij); @@ -489,7 +791,7 @@ vector IndicatorCommand::getValues(vector< vector IndicatorCommand::getValues(vector< vector IndicatorCommand::getValues(vector< vector >& groupings){ +vector IndicatorCommand::getValues(vector< vector >& groupings, map< vector, vector > groupingsMap){ try { vector values; @@ -518,6 +820,8 @@ vector IndicatorCommand::getValues(vector< vector >& cout << groupings[j][k]->getGroup() << endl; } }*/ + map< vector, vector >::iterator it; + //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { vector terms; @@ -529,11 +833,20 @@ vector IndicatorCommand::getValues(vector< vector >& int totalAbund = 0.0; int numNotZero = 0; for (int k = 0; k < groupings[j].size(); k++) { - totalAbund += groupings[j][k]->getAbundance(i); - if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; } + vector temp; temp.push_back(j); temp.push_back(k); + it = groupingsMap.find(temp); - } + if (it == groupingsMap.end()) { //this one didnt get moved + totalAbund += groupings[j][k]->getAbundance(i); + if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; } + }else { + totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i); + if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; } + } + } + + //mean abundance float Aij = (totalAbund / (float) groupings[j].size()); terms.push_back(Aij); @@ -545,7 +858,7 @@ vector IndicatorCommand::getValues(vector< vector >& float maxIndVal = 0.0; for (int j = 0; j < terms.size(); j++) { - float thisAij = (terms[j] / AijDenominator); + float thisAij = (terms[j] / AijDenominator); //relative abundance float thisValue = thisAij * Bij[j] * 100.0; //save largest @@ -563,45 +876,60 @@ vector IndicatorCommand::getValues(vector< vector >& } } //********************************************************************************************************************** -//you need the distances to leaf to decide groupings +//you need the distances to root to decide groupings //this will also set branch lengths if the tree does not include them -map IndicatorCommand::getLengthToLeaf(Tree*& T){ +map IndicatorCommand::getDistToRoot(Tree*& T){ try { map dists; - for (int i = 0; i < T->getNumNodes(); i++) { - - int lc = T->tree[i].getLChild(); - int rc = T->tree[i].getRChild(); - - //if you have no branch length, set it then calc - if (T->tree[i].getBranchLength() <= 0.0) { + bool hasBranchLengths = false; + for (int i = 0; i < T->getNumNodes(); i++) { + if (T->tree[i].getBranchLength() > 0.0) { hasBranchLengths = true; break; } + } + + //set branchlengths if needed + if (!hasBranchLengths) { + for (int i = 0; i < T->getNumNodes(); i++) { + + int lc = T->tree[i].getLChild(); + int rc = T->tree[i].getRChild(); if (lc == -1) { // you are a leaf //if you are a leaf set you priliminary length to 1.0, this may adjust later T->tree[i].setBranchLength(1.0); - dists[i] = 0.0; + dists[i] = 1.0; }else{ // you are an internal node //look at your children's length to leaf float ldist = dists[lc]; float rdist = dists[rc]; - float greater; - if (rdist > greater) { greater = rdist; } - else { greater = ldist; } + float greater = ldist; + if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0;} + else { dists[i] = rdist + 1.0; } + //branch length = difference + 1 T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0)); T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0)); - - dists[i] = dists[lc] + (abs(ldist-greater) + 1.0); } - - }else{ - if (lc == -1) { dists[i] = 0.0; } - else { dists[i] = dists[lc] + T->tree[lc].getBranchLength(); } } + } + + dists.clear(); + + for (int i = 0; i < T->getNumNodes(); i++) { + + double sum = 0.0; + int index = i; + while(T->tree[index].getParent() != -1){ + if (T->tree[index].getBranchLength() != -1) { + sum += abs(T->tree[index].getBranchLength()); + } + index = T->tree[index].getParent(); + } + + dists[i] = sum; } return dists; @@ -623,8 +951,18 @@ set IndicatorCommand::getDescendantList(Tree*& T, int i, map temp; temp.insert(i); - names.insert(T->tree[i].getName()); nodes[i] = temp; + + if (designfile == "") { + names.insert(T->tree[i].getName()); + }else { + vector myGroup; myGroup.push_back(T->tree[i].getName()); + vector myReps = designMap->getNamesSeqs(myGroup); + for (int k = 0; k < myReps.size(); k++) { + names.insert(myReps[k]); + } + } + }else{ //your descedants are the combination of your childrens descendants names = descendants[lc]; nodes[i] = nodes[lc]; @@ -634,6 +972,8 @@ set IndicatorCommand::getDescendantList(Tree*& T, int i, map::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) { nodes[i].insert(*itNum); } + //you are your own descendant + nodes[i].insert(i); } return names; @@ -650,6 +990,8 @@ int IndicatorCommand::getShared(){ lookup = input->getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); + if (label == "") { label = lastLabel; delete input; return 0; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; @@ -724,6 +1066,8 @@ int IndicatorCommand::getSharedFloat(){ lookupFloat = input->getSharedRAbundFloatVectors(); string lastLabel = lookupFloat[0]->getLabel(); + if (label == "") { label = lastLabel; delete input; return 0; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; @@ -791,7 +1135,76 @@ int IndicatorCommand::getSharedFloat(){ m->errorOut(e, "IndicatorCommand", "getShared"); exit(1); } -} +} +//********************************************************************************************************************** +//swap groups between groupings, in essence randomizing the second column of the design file +map< vector, vector > IndicatorCommand::randomizeGroupings(vector< vector >& groupings, int numLookupGroups){ + try { + + map< vector, vector > randomGroupings; + + for (int i = 0; i < numLookupGroups; i++) { + if (m->control_pressed) {break;} + + //get random groups to swap to switch with + //generate random int between 0 and groupings.size()-1 + int z = m->getRandomIndex(groupings.size()-1); + int x = m->getRandomIndex(groupings.size()-1); + int a = m->getRandomIndex(groupings[z].size()-1); + int b = m->getRandomIndex(groupings[x].size()-1); + //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; + //if ((z < 0) || (z > 1) || x<0 || x>1 || a <0 || a>groupings[z].size()-1 || b<0 || b>groupings[x].size()-1) { cout << "probelm" << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; } + + vector from; + vector to; + + from.push_back(z); from.push_back(a); + to.push_back(x); to.push_back(b); + + randomGroupings[from] = to; + } + //cout << "done" << endl; + return randomGroupings; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "randomizeGroupings"); + exit(1); + } +} +//********************************************************************************************************************** +//swap groups between groupings, in essence randomizing the second column of the design file +map< vector, vector > IndicatorCommand::randomizeGroupings(vector< vector >& groupings, int numLookupGroups){ + try { + + map< vector, vector > randomGroupings; + + for (int i = 0; i < numLookupGroups; i++) { + + //get random groups to swap to switch with + //generate random int between 0 and groupings.size()-1 + int z = m->getRandomIndex(groupings.size()-1); + int x = m->getRandomIndex(groupings.size()-1); + int a = m->getRandomIndex(groupings[z].size()-1); + int b = m->getRandomIndex(groupings[x].size()-1); + //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; + + vector from; + vector to; + + from.push_back(z); from.push_back(a); + to.push_back(x); to.push_back(b); + + randomGroupings[from] = to; + } + + return randomGroupings; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "randomizeGroupings"); + exit(1); + } +} + /*****************************************************************/