X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=indicatorcommand.cpp;h=239a4bc65d7d80ddf6322eca0188550068cf758f;hb=c3396974063d6efc5e5850ddf4ed8ab65cc94bb9;hp=9f96f35e95c5b5f74d58690e8d485cb0987114a8;hpb=3e2465c16d187247ce3befd29811c2d5dfc15ee8;p=mothur.git diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 9f96f35..239a4bc 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -8,10 +8,12 @@ */ #include "indicatorcommand.h" +#include "sharedutilities.h" + //********************************************************************************************************************** vector IndicatorCommand::getValidParameters(){ try { - string Array[] = {"tree","shared","relabund","label","groups","outputdir","inputdir"}; + string Array[] = {"tree","shared","relabund","design","label","groups","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -35,8 +37,7 @@ vector IndicatorCommand::getRequiredParameters(){ //********************************************************************************************************************** IndicatorCommand::IndicatorCommand(){ try { - abort = true; - //initialize outputTypes + abort = true; calledHelp = true; vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["summary"] = tempOutNames; @@ -62,14 +63,14 @@ vector IndicatorCommand::getRequiredFiles(){ IndicatorCommand::IndicatorCommand(string option) { try { globaldata = GlobalData::getInstance(); - abort = false; + abort = false; calledHelp = false; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } else { //valid paramters for this command - string Array[] = {"tree","shared","relabund","groups","label","outputdir","inputdir"}; + string Array[] = {"tree","shared","design","relabund","groups","label","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -118,6 +119,13 @@ IndicatorCommand::IndicatorCommand(string option) { if (path == "") { parameters["relabund"] = inputDir + it->second; } } + it = parameters.find("design"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["design"] = inputDir + it->second; } + } } outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -138,6 +146,10 @@ IndicatorCommand::IndicatorCommand(string option) { else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; } + designfile = validParameter.validFile(parameters, "design", true); + if (designfile == "not open") { abort = true; } + else if (designfile == "not found") { designfile = ""; } + groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; Groups.push_back("all"); } else { m->splitAtDash(groups, Groups); } @@ -164,8 +176,9 @@ void IndicatorCommand::help(){ m->mothurOut("The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n"); m->mothurOut("The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"); m->mothurOut("The summary file lists the indicator value for each OTU for each node.\n"); - m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree parameter is required as well as either shared or relabund.\n"); - m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed. The groups may be entered separated by dashes.\n"); + m->mothurOut("The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n"); + m->mothurOut("The design parameter allows you to provide a design file to relate the tree to the shared or relabund file.\n"); + m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n"); m->mothurOut("The label parameter indicates at what distance your tree relates to the shared or relabund.\n"); m->mothurOut("The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); @@ -185,20 +198,37 @@ IndicatorCommand::~IndicatorCommand(){} int IndicatorCommand::execute(){ try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //read designfile if given and set up globaldatas groups for read of sharedfiles + if (designfile != "") { + designMap = new GroupMap(designfile); + designMap->readDesignMap(); + + //fill Groups - checks for "all" and for any typo groups + SharedUtil* util = new SharedUtil(); + util->setGroups(Groups, designMap->namesOfGroups); + delete util; + + //loop through the Groups and fill Globaldata's Groups with the design file info + globaldata->Groups = designMap->getNamesSeqs(Groups); + } /***************************************************/ // use smart distancing to get right sharedRabund // /***************************************************/ if (sharedfile != "") { getShared(); - if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; } }else { getSharedFloat(); - if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } + if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; } } + + //reset Globaldatas groups if needed + if (designfile != "") { globaldata->Groups = Groups; } /***************************************************/ // reading tree info // @@ -209,16 +239,33 @@ int IndicatorCommand::execute(){ globaldata->setGroupFile(groupfile); treeMap = new TreeMap(); bool mismatch = false; + for (int i = 0; i < globaldata->Treenames.size(); i++) { //sanity check - is this a group that is not in the sharedfile? - if (!(m->inUsersGroups(globaldata->Treenames[i], globaldata->gGroupmap->namesOfGroups))) { - m->mothurOut("[ERROR]: " + globaldata->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); - mismatch = true; + if (designfile == "") { + if (!(m->inUsersGroups(globaldata->Treenames[i], globaldata->gGroupmap->namesOfGroups))) { + m->mothurOut("[ERROR]: " + globaldata->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); + mismatch = true; + } + treeMap->addSeq(globaldata->Treenames[i], "Group1"); + }else{ + vector myGroups; myGroups.push_back(globaldata->Treenames[i]); + vector myNames = designMap->getNamesSeqs(myGroups); + + for(int k = 0; k < myNames.size(); k++) { + if (!(m->inUsersGroups(myNames[k], globaldata->gGroupmap->namesOfGroups))) { + m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); + mismatch = true; + } + } + treeMap->addSeq(globaldata->Treenames[i], "Group1"); } - treeMap->addSeq(globaldata->Treenames[i], "Group1"); } + + if ((designfile != "") && (globaldata->Treenames.size() != Groups.size())) { m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; } if (mismatch) { //cleanup and exit + if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } delete treeMap; @@ -236,7 +283,8 @@ int IndicatorCommand::execute(){ delete read; - if (m->control_pressed) { + if (m->control_pressed) { + if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } for (int i = 0; i < T.size(); i++) { delete T[i]; } globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; @@ -256,7 +304,8 @@ int IndicatorCommand::execute(){ for (int i = 0; i < T.size(); i++) { delete T[i]; } globaldata->gTree.clear(); - if (m->control_pressed) { + if (m->control_pressed) { + if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } delete outputTree; delete globaldata->gTreemap; return 0; @@ -267,6 +316,8 @@ int IndicatorCommand::execute(){ /***************************************************/ GetIndicatorSpecies(outputTree); + if (designfile != "") { delete designMap; } + if (m->control_pressed) { if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } @@ -301,7 +352,16 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ ofstream out; m->openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - + + int numBins = 0; + if (sharedfile != "") { numBins = lookup[0]->getNumBins(); } + else { numBins = lookupFloat[0]->getNumBins(); } + + //print headings + out << "TreeNode\t"; + for (int i = 0; i < numBins; i++) { out << "OTU-" << (i+1) << '\t'; } + out << endl; + string treeOutputDir = outputDir; if (outputDir == "") { treeOutputDir += m->hasPath(treefile); } string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre"; @@ -318,7 +378,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //you need the distances to leaf to decide grouping below //this will also set branch lengths if the tree does not include them - map distToLeaf = getLengthToLeaf(T); + map distToRoot = getDistToRoot(T); //for each node for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) { @@ -336,9 +396,8 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //get nodes that will be a valid grouping //you are valid if you are not one of my descendants - //AND your distToLeaf is <= mine - //AND your distToLeaf is >= my smallest childs - //AND you were not added as part of a larger groupings + //AND your distToRoot is >= mine + //AND you were not added as part of a larger grouping. Largest nodes are added first. set groupsAlreadyAdded; //create a grouping with my grouping @@ -358,7 +417,9 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ for (int j = (T->getNumNodes()-1); j >= 0; j--) { - if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) { + + + if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { vector subset; int count = 0; int doneCount = nodeToDescendants[j].size(); @@ -377,11 +438,8 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } } - if (groupsAlreadyAdded.size() != lookup.size()) { cout << i << '\t' << groupsAlreadyAdded.size() << '\t' << lookup.size() << endl; m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - for (int k = 0; k < lookup.size(); k++) { - if (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0) { cout << lookup[k]->getGroup() << endl; } - } - + if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + indicatorValues = getValues(groupings); }else { @@ -389,9 +447,8 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //get nodes that will be a valid grouping //you are valid if you are not one of my descendants - //AND your distToLeaf is <= mine - //AND your distToLeaf is >= my smallest childs - //AND you were not added as part of a larger grouping + //AND your distToRoot is >= mine + //AND you were not added as part of a larger grouping. Largest nodes are added first. set groupsAlreadyAdded; //create a grouping with my grouping @@ -410,7 +467,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ if (subset.size() != 0) { groupings.push_back(subset); } for (int j = (T->getNumNodes()-1); j >= 0; j--) { - if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) { + if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { vector subset; int count = 0; int doneCount = nodeToDescendants[j].size(); @@ -576,9 +633,9 @@ vector IndicatorCommand::getValues(vector< vector >& } } //********************************************************************************************************************** -//you need the distances to leaf to decide groupings +//you need the distances to root to decide groupings //this will also set branch lengths if the tree does not include them -map IndicatorCommand::getLengthToLeaf(Tree*& T){ +map IndicatorCommand::getDistToRoot(Tree*& T){ try { map dists; @@ -587,13 +644,13 @@ map IndicatorCommand::getLengthToLeaf(Tree*& T){ if (T->tree[i].getBranchLength() > 0.0) { hasBranchLengths = true; break; } } - for (int i = 0; i < T->getNumNodes(); i++) { - - int lc = T->tree[i].getLChild(); - int rc = T->tree[i].getRChild(); - - //if you have no branch length, set it then calc - if (!hasBranchLengths) { + //set branchlengths if needed + if (!hasBranchLengths) { + for (int i = 0; i < T->getNumNodes(); i++) { + + int lc = T->tree[i].getLChild(); + int rc = T->tree[i].getRChild(); + if (lc == -1) { // you are a leaf //if you are a leaf set you priliminary length to 1.0, this may adjust later T->tree[i].setBranchLength(1.0); @@ -604,28 +661,32 @@ map IndicatorCommand::getLengthToLeaf(Tree*& T){ float rdist = dists[rc]; float greater = ldist; - if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0; } + if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0;} else { dists[i] = rdist + 1.0; } + //branch length = difference + 1 T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0)); T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0)); } - - }else{ - if (lc == -1) { dists[i] = T->tree[i].getBranchLength(); } - else { //smaller of my two children distances plus my branch length - //look at your children's length to leaf - float ldist = dists[lc]; - float rdist = dists[rc]; - - float smaller = ldist; - if (rdist < smaller) { smaller = rdist; } - - dists[i] = smaller + T->tree[i].getBranchLength(); + } + } + + dists.clear(); + + for (int i = 0; i < T->getNumNodes(); i++) { + + double sum = 0.0; + int index = i; + + while(T->tree[index].getParent() != -1){ + if (T->tree[index].getBranchLength() != -1) { + sum += abs(T->tree[index].getBranchLength()); } + index = T->tree[index].getParent(); } + dists[i] = sum; } return dists; @@ -647,8 +708,18 @@ set IndicatorCommand::getDescendantList(Tree*& T, int i, map temp; temp.insert(i); - names.insert(T->tree[i].getName()); nodes[i] = temp; + + if (designfile == "") { + names.insert(T->tree[i].getName()); + }else { + vector myGroup; myGroup.push_back(T->tree[i].getName()); + vector myReps = designMap->getNamesSeqs(myGroup); + for (int k = 0; k < myReps.size(); k++) { + names.insert(myReps[k]); + } + } + }else{ //your descedants are the combination of your childrens descendants names = descendants[lc]; nodes[i] = nodes[lc];