5 * Created by westcott on 11/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "indicatorcommand.h"
11 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> IndicatorCommand::setParameters(){
17 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
18 CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(pdesign);
19 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared);
20 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23 CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(ptree);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "IndicatorCommand", "setParameters");
36 //**********************************************************************************************************************
37 string IndicatorCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The indicator command reads a shared or relabund file and a tree or design file, and outputs a .indicator.tre and .indicator.summary file. \n";
41 helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n";
42 helpString += "The summary file lists the indicator value for each OTU for each node.\n";
43 helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n";
44 helpString += "The design parameter allows you to provide a design file to relate the tree to the shared or relabund file.\n";
45 helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n";
46 helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n";
47 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000.";
48 helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n";
49 helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n";
53 m->errorOut(e, "IndicatorCommand", "getHelpString");
58 //**********************************************************************************************************************
59 IndicatorCommand::IndicatorCommand(){
61 abort = true; calledHelp = true;
63 vector<string> tempOutNames;
64 outputTypes["tree"] = tempOutNames;
65 outputTypes["summary"] = tempOutNames;
68 m->errorOut(e, "IndicatorCommand", "IndicatorCommand");
72 //**********************************************************************************************************************
73 IndicatorCommand::IndicatorCommand(string option) {
75 abort = false; calledHelp = false;
77 //allow user to run help
78 if(option == "help") { help(); abort = true; calledHelp = true; }
79 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
82 vector<string> myArray = setParameters();
84 OptionParser parser(option);
85 map<string, string> parameters = parser.getParameters();
87 ValidParameters validParameter;
88 map<string, string>::iterator it;
90 //check to make sure all parameters are valid for command
91 for (it = parameters.begin(); it != parameters.end(); it++) {
92 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
97 m->namesOfGroups.clear();
101 vector<string> tempOutNames;
102 outputTypes["tree"] = tempOutNames;
103 outputTypes["summary"] = tempOutNames;
105 //if the user changes the input directory command factory will send this info to us in the output parameter
106 string inputDir = validParameter.validFile(parameters, "inputdir", false);
107 if (inputDir == "not found"){ inputDir = ""; }
110 it = parameters.find("tree");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["tree"] = inputDir + it->second; }
118 it = parameters.find("shared");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["shared"] = inputDir + it->second; }
126 it = parameters.find("relabund");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["relabund"] = inputDir + it->second; }
134 it = parameters.find("design");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["design"] = inputDir + it->second; }
143 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { treefile = ""; abort = true; }
148 else if (treefile == "not found") { treefile = ""; }
149 else { m->setTreeFile(treefile); }
151 sharedfile = validParameter.validFile(parameters, "shared", true);
152 if (sharedfile == "not open") { abort = true; }
153 else if (sharedfile == "not found") { sharedfile = ""; }
154 else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
156 relabundfile = validParameter.validFile(parameters, "relabund", true);
157 if (relabundfile == "not open") { abort = true; }
158 else if (relabundfile == "not found") { relabundfile = ""; }
159 else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
161 designfile = validParameter.validFile(parameters, "design", true);
162 if (designfile == "not open") { designfile = ""; abort = true; }
163 else if (designfile == "not found") { designfile = ""; }
164 else { m->setDesignFile(designfile); }
166 groups = validParameter.validFile(parameters, "groups", false);
167 if (groups == "not found") { groups = ""; Groups.push_back("all"); }
168 else { m->splitAtDash(groups, Groups); }
171 label = validParameter.validFile(parameters, "label", false);
172 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
174 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
175 convert(temp, iters);
177 if ((relabundfile == "") && (sharedfile == "")) {
178 //is there are current file available for either of these?
179 //give priority to shared, then relabund
180 sharedfile = m->getSharedFile();
181 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
183 relabundfile = m->getRelAbundFile();
184 if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
186 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
193 if ((designfile == "") && (treefile == "")) {
194 treefile = m->getTreeFile();
195 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
197 designfile = m->getDesignFile();
198 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
200 m->mothurOut("[ERROR]: You must provide either a tree or design file."); m->mothurOutEndLine(); abort = true;
205 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
209 catch(exception& e) {
210 m->errorOut(e, "IndicatorCommand", "IndicatorCommand");
214 //**********************************************************************************************************************
216 int IndicatorCommand::execute(){
219 if (abort == true) { if (calledHelp) { return 0; } return 2; }
221 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
223 //read designfile if given and set up globaldatas groups for read of sharedfiles
224 if (designfile != "") {
225 designMap = new GroupMap(designfile);
226 designMap->readDesignMap();
228 //fill Groups - checks for "all" and for any typo groups
229 SharedUtil* util = new SharedUtil();
230 util->setGroups(Groups, designMap->namesOfGroups);
233 //loop through the Groups and fill Globaldata's Groups with the design file info
234 m->Groups = designMap->getNamesSeqs(Groups);
237 /***************************************************/
238 // use smart distancing to get right sharedRabund //
239 /***************************************************/
240 if (sharedfile != "") {
242 if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
243 if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
246 if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; }
247 if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
250 //reset Globaldatas groups if needed
251 if (designfile != "") { m->Groups = Groups; }
253 /***************************************************/
254 // reading tree info //
255 /***************************************************/
256 if (treefile != "") {
257 string groupfile = "";
258 m->setTreeFile(treefile);
259 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
260 treeMap = new TreeMap();
261 bool mismatch = false;
263 for (int i = 0; i < m->Treenames.size(); i++) {
264 //sanity check - is this a group that is not in the sharedfile?
265 if (designfile == "") {
266 if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) {
267 m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
270 treeMap->addSeq(m->Treenames[i], "Group1");
272 vector<string> myGroups; myGroups.push_back(m->Treenames[i]);
273 vector<string> myNames = designMap->getNamesSeqs(myGroups);
275 for(int k = 0; k < myNames.size(); k++) {
276 if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) {
277 m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
281 treeMap->addSeq(m->Treenames[i], "Group1");
285 if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; }
287 if (mismatch) { //cleanup and exit
288 if (designfile != "") { delete designMap; }
289 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
290 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
295 read = new ReadNewickTree(treefile);
296 int readOk = read->read(treeMap);
298 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; }
300 vector<Tree*> T = read->getTrees();
304 if (m->control_pressed) {
305 if (designfile != "") { delete designMap; }
306 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
307 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
308 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0;
311 T[0]->assembleTree();
313 /***************************************************/
314 // create ouptut tree - respecting pickedGroups //
315 /***************************************************/
316 Tree* outputTree = new Tree(m->Groups.size(), treeMap);
318 outputTree->getSubTree(T[0], m->Groups);
319 outputTree->assembleTree();
321 //no longer need original tree, we have output tree to use and label
322 for (int i = 0; i < T.size(); i++) { delete T[i]; }
325 if (m->control_pressed) {
326 if (designfile != "") { delete designMap; }
327 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
328 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
329 delete outputTree; delete treeMap; return 0;
332 /***************************************************/
333 // get indicator species values //
334 /***************************************************/
335 GetIndicatorSpecies(outputTree);
336 delete outputTree; delete treeMap;
338 }else { //run with design file only
339 //get indicator species
340 GetIndicatorSpecies();
343 if (designfile != "") { delete designMap; }
344 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
345 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
347 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
349 //set tree file as new current treefile
350 if (treefile != "") {
352 itTypes = outputTypes.find("tree");
353 if (itTypes != outputTypes.end()) {
354 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
357 m->mothurOutEndLine();
358 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
359 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
360 m->mothurOutEndLine();
364 catch(exception& e) {
365 m->errorOut(e, "IndicatorCommand", "execute");
369 //**********************************************************************************************************************
370 //divide shared or relabund file by groupings in the design file
371 //report all otu values to file
372 int IndicatorCommand::GetIndicatorSpecies(){
374 string thisOutputDir = outputDir;
375 if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); }
376 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
377 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
380 m->openOutputFile(outputFileName, out);
381 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
384 if (sharedfile != "") { numBins = lookup[0]->getNumBins(); }
385 else { numBins = lookupFloat[0]->getNumBins(); }
387 if (m->control_pressed) { out.close(); return 0; }
389 /*****************************************************/
390 //create vectors containing rabund info //
391 /*****************************************************/
393 vector<float> indicatorValues; //size of numBins
394 vector<float> pValues;
395 map< vector<int>, vector<int> > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors.
397 if (sharedfile != "") {
398 vector< vector<SharedRAbundVector*> > groupings;
399 set<string> groupsAlreadyAdded;
400 vector<SharedRAbundVector*> subset;
403 for (int i = 0; i < designMap->namesOfGroups.size(); i++) {
405 for (int k = 0; k < lookup.size(); k++) {
406 //are you from this grouping?
407 if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) {
408 subset.push_back(lookup[k]);
409 groupsAlreadyAdded.insert(lookup[k]->getGroup());
412 if (subset.size() != 0) { groupings.push_back(subset); }
416 if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
418 indicatorValues = getValues(groupings, randomGroupingsMap);
420 pValues.resize(indicatorValues.size(), 0);
421 for(int i=0;i<iters;i++){
422 if (m->control_pressed) { break; }
423 randomGroupingsMap = randomizeGroupings(groupings, lookup.size());
424 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
426 for (int j = 0; j < indicatorValues.size(); j++) {
427 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
431 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
434 vector< vector<SharedRAbundFloatVector*> > groupings;
435 set<string> groupsAlreadyAdded;
436 vector<SharedRAbundFloatVector*> subset;
439 for (int i = 0; i < designMap->namesOfGroups.size(); i++) {
440 for (int k = 0; k < lookupFloat.size(); k++) {
441 //are you from this grouping?
442 if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) {
443 subset.push_back(lookupFloat[k]);
444 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
447 if (subset.size() != 0) { groupings.push_back(subset); }
451 if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
453 indicatorValues = getValues(groupings, randomGroupingsMap);
455 pValues.resize(indicatorValues.size(), 0);
456 for(int i=0;i<iters;i++){
457 if (m->control_pressed) { break; }
458 randomGroupingsMap = randomizeGroupings(groupings, lookupFloat.size());
459 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
461 for (int j = 0; j < indicatorValues.size(); j++) {
462 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
466 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
469 if (m->control_pressed) { out.close(); return 0; }
472 /******************************************************/
473 //output indicator values to table form //
474 /*****************************************************/
476 double maxValue, pvalue; maxValue=0.0; pvalue=0.0;
477 out << "OTU\tIndicator_Value\tpValue" << endl;
478 for (int j = 0; j < indicatorValues.size(); j++) {
480 if (m->control_pressed) { out.close(); return 0; }
482 out << (j+1) << '\t' << indicatorValues[j] << '\t';
484 if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; }
485 else { out << "<" << (1/(float)iters) << endl; }
487 if (maxValue < indicatorValues[j]) {
488 maxValue = indicatorValues[j];
493 m->mothurOutEndLine(); m->mothurOut("Species\tIndicatorValue\tpValue\n");
494 cout << "OTU" << indicatorOTU+1 << '\t' << maxValue << '\t';
495 string pValueString = "<" + toString((1/(float)iters));
496 if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];}
497 else { cout << "<" << (1/(float)iters); }
498 m->mothurOutJustToLog("OTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString);
499 m->mothurOutEndLine();
505 catch(exception& e) {
506 m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");
510 //**********************************************************************************************************************
511 //traverse tree finding indicator species values for each otu at each node
512 //label node with otu number that has highest indicator value
513 //report all otu values to file
514 int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
517 string thisOutputDir = outputDir;
518 if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); }
519 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
520 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
523 m->openOutputFile(outputFileName, out);
524 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
527 if (sharedfile != "") { numBins = lookup[0]->getNumBins(); }
528 else { numBins = lookupFloat[0]->getNumBins(); }
532 for (int i = 0; i < numBins; i++) { out << "OTU" << (i+1) << "_IndValue" << '\t' << "pValue" << '\t'; }
535 m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicatorValue\tpValue\n");
537 string treeOutputDir = outputDir;
538 if (outputDir == "") { treeOutputDir += m->hasPath(treefile); }
539 string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre";
542 //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need
543 map<int, set<string> > nodeToDescendants;
544 map<int, set<int> > descendantNodes;
545 for (int i = 0; i < T->getNumNodes(); i++) {
546 if (m->control_pressed) { return 0; }
548 nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants, descendantNodes);
551 //you need the distances to leaf to decide grouping below
552 //this will also set branch lengths if the tree does not include them
553 map<int, float> distToRoot = getDistToRoot(T);
556 for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
558 if (m->control_pressed) { out.close(); return 0; }
560 /*****************************************************/
561 //create vectors containing rabund info //
562 /*****************************************************/
564 vector<float> indicatorValues; //size of numBins
565 vector<float> pValues;
566 map< vector<int>, vector<int> > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors.
568 if (sharedfile != "") {
569 vector< vector<SharedRAbundVector*> > groupings;
571 //get nodes that will be a valid grouping
572 //you are valid if you are not one of my descendants
573 //AND your distToRoot is >= mine
574 //AND you were not added as part of a larger grouping. Largest nodes are added first.
576 set<string> groupsAlreadyAdded;
577 //create a grouping with my grouping
578 vector<SharedRAbundVector*> subset;
580 int doneCount = nodeToDescendants[i].size();
581 for (int k = 0; k < lookup.size(); k++) {
582 //is this descendant of i
583 if ((nodeToDescendants[i].count(lookup[k]->getGroup()) != 0)) {
584 subset.push_back(lookup[k]);
585 groupsAlreadyAdded.insert(lookup[k]->getGroup());
588 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
590 if (subset.size() != 0) { groupings.push_back(subset); }
593 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
596 if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) {
597 vector<SharedRAbundVector*> subset;
599 int doneCount = nodeToDescendants[j].size();
600 for (int k = 0; k < lookup.size(); k++) {
601 //is this descendant of j, and we didn't already add this as part of a larger grouping
602 if ((nodeToDescendants[j].count(lookup[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0)) {
603 subset.push_back(lookup[k]);
604 groupsAlreadyAdded.insert(lookup[k]->getGroup());
607 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
610 //if subset.size == 0 then the node was added as part of a larger grouping
611 if (subset.size() != 0) { groupings.push_back(subset); }
615 if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
617 indicatorValues = getValues(groupings, randomGroupingsMap);
619 pValues.resize(indicatorValues.size(), 0);
620 for(int i=0;i<iters;i++){
621 if (m->control_pressed) { break; }
622 randomGroupingsMap = randomizeGroupings(groupings, lookup.size());
623 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
625 for (int j = 0; j < indicatorValues.size(); j++) {
626 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
630 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
633 vector< vector<SharedRAbundFloatVector*> > groupings;
635 //get nodes that will be a valid grouping
636 //you are valid if you are not one of my descendants
637 //AND your distToRoot is >= mine
638 //AND you were not added as part of a larger grouping. Largest nodes are added first.
640 set<string> groupsAlreadyAdded;
641 //create a grouping with my grouping
642 vector<SharedRAbundFloatVector*> subset;
644 int doneCount = nodeToDescendants[i].size();
645 for (int k = 0; k < lookupFloat.size(); k++) {
646 //is this descendant of i
647 if ((nodeToDescendants[i].count(lookupFloat[k]->getGroup()) != 0)) {
648 subset.push_back(lookupFloat[k]);
649 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
652 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
654 if (subset.size() != 0) { groupings.push_back(subset); }
656 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
657 if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) {
658 vector<SharedRAbundFloatVector*> subset;
660 int doneCount = nodeToDescendants[j].size();
661 for (int k = 0; k < lookupFloat.size(); k++) {
662 //is this descendant of j, and we didn't already add this as part of a larger grouping
663 if ((nodeToDescendants[j].count(lookupFloat[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookupFloat[k]->getGroup()) == 0)) {
664 subset.push_back(lookupFloat[k]);
665 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
668 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
671 //if subset.size == 0 then the node was added as part of a larger grouping
672 if (subset.size() != 0) { groupings.push_back(subset); }
676 if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
678 indicatorValues = getValues(groupings, randomGroupingsMap);
680 pValues.resize(indicatorValues.size(), 0);
681 for(int i=0;i<iters;i++){
683 if (m->control_pressed) { break; }
684 randomGroupingsMap = randomizeGroupings(groupings, lookup.size());
685 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
687 for (int j = 0; j < indicatorValues.size(); j++) {
688 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
692 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
695 if (m->control_pressed) { out.close(); return 0; }
698 /******************************************************/
699 //output indicator values to table form + label tree //
700 /*****************************************************/
701 out << (i+1) << '\t';
703 double maxValue, pvalue; maxValue=0.0; pvalue=0.0;
704 for (int j = 0; j < indicatorValues.size(); j++) {
706 if (m->control_pressed) { out.close(); return 0; }
708 if (pValues[j] < (1/(float)iters)) {
709 out << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t';
711 out << indicatorValues[j] << '\t' << pValues[j] << '\t';
714 if (maxValue < indicatorValues[j]) {
715 maxValue = indicatorValues[j];
721 T->tree[i].setLabel((i+1));
723 cout << i+1 << "\tOTU" << indicatorOTU+1 << '\t' << maxValue << '\t';
724 string pValueString = "<" + toString((1/(float)iters));
725 if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];}
726 else { cout << "<" << (1/(float)iters); }
727 m->mothurOutJustToLog(toString(i) + "\tOTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString);
728 m->mothurOutEndLine();
735 m->openOutputFile(outputTreeFileName, outTree);
736 outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
738 T->print(outTree, "both");
743 catch(exception& e) {
744 m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");
748 //**********************************************************************************************************************
749 vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundFloatVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap){
751 vector<float> values;
752 map< vector<int>, vector<int> >::iterator it;
755 for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
757 if (m->control_pressed) { return values; }
760 float AijDenominator = 0.0;
763 //get overall abundance of each grouping
764 for (int j = 0; j < groupings.size(); j++) {
766 float totalAbund = 0;
768 for (int k = 0; k < groupings[j].size(); k++) {
769 vector<int> temp; temp.push_back(j); temp.push_back(k);
770 it = groupingsMap.find(temp);
772 if (it == groupingsMap.end()) { //this one didnt get moved
773 totalAbund += groupings[j][k]->getAbundance(i);
774 if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
776 totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i);
777 if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
783 float Aij = (totalAbund / (float) groupings[j].size());
784 terms.push_back(Aij);
786 //percentage of sites represented
787 Bij.push_back(numNotZero / (float) groupings[j].size());
789 AijDenominator += Aij;
792 float maxIndVal = 0.0;
793 for (int j = 0; j < terms.size(); j++) {
794 float thisAij = (terms[j] / AijDenominator); //relative abundance
795 float thisValue = thisAij * Bij[j] * 100.0;
798 if (thisValue > maxIndVal) { maxIndVal = thisValue; }
801 values.push_back(maxIndVal);
806 catch(exception& e) {
807 m->errorOut(e, "IndicatorCommand", "getValues");
811 //**********************************************************************************************************************
812 //same as above, just data type difference
813 vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap){
815 vector<float> values;
817 /*for (int j = 0; j < groupings.size(); j++) {
818 cout << "grouping " << j << endl;
819 for (int k = 0; k < groupings[j].size(); k++) {
820 cout << groupings[j][k]->getGroup() << endl;
823 map< vector<int>, vector<int> >::iterator it;
826 for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
828 float AijDenominator = 0.0;
830 //get overall abundance of each grouping
831 for (int j = 0; j < groupings.size(); j++) {
833 int totalAbund = 0.0;
835 for (int k = 0; k < groupings[j].size(); k++) {
836 vector<int> temp; temp.push_back(j); temp.push_back(k);
837 it = groupingsMap.find(temp);
839 if (it == groupingsMap.end()) { //this one didnt get moved
840 totalAbund += groupings[j][k]->getAbundance(i);
841 if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
843 totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i);
844 if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
850 float Aij = (totalAbund / (float) groupings[j].size());
851 terms.push_back(Aij);
853 //percentage of sites represented
854 Bij.push_back(numNotZero / (float) groupings[j].size());
856 AijDenominator += Aij;
859 float maxIndVal = 0.0;
860 for (int j = 0; j < terms.size(); j++) {
861 float thisAij = (terms[j] / AijDenominator); //relative abundance
862 float thisValue = thisAij * Bij[j] * 100.0;
865 if (thisValue > maxIndVal) { maxIndVal = thisValue; }
868 values.push_back(maxIndVal);
873 catch(exception& e) {
874 m->errorOut(e, "IndicatorCommand", "getValues");
878 //**********************************************************************************************************************
879 //you need the distances to root to decide groupings
880 //this will also set branch lengths if the tree does not include them
881 map<int, float> IndicatorCommand::getDistToRoot(Tree*& T){
883 map<int, float> dists;
885 bool hasBranchLengths = false;
886 for (int i = 0; i < T->getNumNodes(); i++) {
887 if (T->tree[i].getBranchLength() > 0.0) { hasBranchLengths = true; break; }
890 //set branchlengths if needed
891 if (!hasBranchLengths) {
892 for (int i = 0; i < T->getNumNodes(); i++) {
894 int lc = T->tree[i].getLChild();
895 int rc = T->tree[i].getRChild();
897 if (lc == -1) { // you are a leaf
898 //if you are a leaf set you priliminary length to 1.0, this may adjust later
899 T->tree[i].setBranchLength(1.0);
901 }else{ // you are an internal node
902 //look at your children's length to leaf
903 float ldist = dists[lc];
904 float rdist = dists[rc];
906 float greater = ldist;
907 if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0;}
908 else { dists[i] = rdist + 1.0; }
911 //branch length = difference + 1
912 T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0));
913 T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0));
920 for (int i = 0; i < T->getNumNodes(); i++) {
925 while(T->tree[index].getParent() != -1){
926 if (T->tree[index].getBranchLength() != -1) {
927 sum += abs(T->tree[index].getBranchLength());
929 index = T->tree[index].getParent();
937 catch(exception& e) {
938 m->errorOut(e, "IndicatorCommand", "getLengthToLeaf");
942 //**********************************************************************************************************************
943 set<string> IndicatorCommand::getDescendantList(Tree*& T, int i, map<int, set<string> > descendants, map<int, set<int> >& nodes){
947 set<string>::iterator it;
949 int lc = T->tree[i].getLChild();
950 int rc = T->tree[i].getRChild();
952 if (lc == -1) { //you are a leaf your only descendant is yourself
953 set<int> temp; temp.insert(i);
956 if (designfile == "") {
957 names.insert(T->tree[i].getName());
959 vector<string> myGroup; myGroup.push_back(T->tree[i].getName());
960 vector<string> myReps = designMap->getNamesSeqs(myGroup);
961 for (int k = 0; k < myReps.size(); k++) {
962 names.insert(myReps[k]);
966 }else{ //your descedants are the combination of your childrens descendants
967 names = descendants[lc];
968 nodes[i] = nodes[lc];
969 for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
972 for (set<int>::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) {
973 nodes[i].insert(*itNum);
975 //you are your own descendant
981 catch(exception& e) {
982 m->errorOut(e, "IndicatorCommand", "getDescendantList");
986 //**********************************************************************************************************************
987 int IndicatorCommand::getShared(){
989 InputData* input = new InputData(sharedfile, "sharedfile");
990 lookup = input->getSharedRAbundVectors();
991 string lastLabel = lookup[0]->getLabel();
993 if (label == "") { label = lastLabel; delete input; return 0; }
995 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
996 set<string> labels; labels.insert(label);
997 set<string> processedLabels;
998 set<string> userLabels = labels;
1000 //as long as you are not at the end of the file or done wih the lines you want
1001 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
1002 if (m->control_pressed) { delete input; return 0; }
1004 if(labels.count(lookup[0]->getLabel()) == 1){
1005 processedLabels.insert(lookup[0]->getLabel());
1006 userLabels.erase(lookup[0]->getLabel());
1010 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
1011 string saveLabel = lookup[0]->getLabel();
1013 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
1014 lookup = input->getSharedRAbundVectors(lastLabel);
1016 processedLabels.insert(lookup[0]->getLabel());
1017 userLabels.erase(lookup[0]->getLabel());
1019 //restore real lastlabel to save below
1020 lookup[0]->setLabel(saveLabel);
1024 lastLabel = lookup[0]->getLabel();
1026 //get next line to process
1027 //prevent memory leak
1028 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
1029 lookup = input->getSharedRAbundVectors();
1033 if (m->control_pressed) { delete input; return 0; }
1035 //output error messages about any remaining user labels
1036 set<string>::iterator it;
1037 bool needToRun = false;
1038 for (it = userLabels.begin(); it != userLabels.end(); it++) {
1039 m->mothurOut("Your file does not include the label " + *it);
1040 if (processedLabels.count(lastLabel) != 1) {
1041 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
1044 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
1048 //run last label if you need to
1049 if (needToRun == true) {
1050 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
1051 lookup = input->getSharedRAbundVectors(lastLabel);
1057 catch(exception& e) {
1058 m->errorOut(e, "IndicatorCommand", "getShared");
1062 //**********************************************************************************************************************
1063 int IndicatorCommand::getSharedFloat(){
1065 InputData* input = new InputData(relabundfile, "relabund");
1066 lookupFloat = input->getSharedRAbundFloatVectors();
1067 string lastLabel = lookupFloat[0]->getLabel();
1069 if (label == "") { label = lastLabel; delete input; return 0; }
1071 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
1072 set<string> labels; labels.insert(label);
1073 set<string> processedLabels;
1074 set<string> userLabels = labels;
1076 //as long as you are not at the end of the file or done wih the lines you want
1077 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
1079 if (m->control_pressed) { delete input; return 0; }
1081 if(labels.count(lookupFloat[0]->getLabel()) == 1){
1082 processedLabels.insert(lookupFloat[0]->getLabel());
1083 userLabels.erase(lookupFloat[0]->getLabel());
1087 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
1088 string saveLabel = lookupFloat[0]->getLabel();
1090 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
1091 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
1093 processedLabels.insert(lookupFloat[0]->getLabel());
1094 userLabels.erase(lookupFloat[0]->getLabel());
1096 //restore real lastlabel to save below
1097 lookupFloat[0]->setLabel(saveLabel);
1101 lastLabel = lookupFloat[0]->getLabel();
1103 //get next line to process
1104 //prevent memory leak
1105 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
1106 lookupFloat = input->getSharedRAbundFloatVectors();
1110 if (m->control_pressed) { delete input; return 0; }
1112 //output error messages about any remaining user labels
1113 set<string>::iterator it;
1114 bool needToRun = false;
1115 for (it = userLabels.begin(); it != userLabels.end(); it++) {
1116 m->mothurOut("Your file does not include the label " + *it);
1117 if (processedLabels.count(lastLabel) != 1) {
1118 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
1121 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
1125 //run last label if you need to
1126 if (needToRun == true) {
1127 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
1128 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
1134 catch(exception& e) {
1135 m->errorOut(e, "IndicatorCommand", "getShared");
1139 //**********************************************************************************************************************
1140 //swap groups between groupings, in essence randomizing the second column of the design file
1141 map< vector<int>, vector<int> > IndicatorCommand::randomizeGroupings(vector< vector<SharedRAbundVector*> >& groupings, int numLookupGroups){
1144 map< vector<int>, vector<int> > randomGroupings;
1146 for (int i = 0; i < numLookupGroups; i++) {
1147 if (m->control_pressed) {break;}
1149 //get random groups to swap to switch with
1150 //generate random int between 0 and groupings.size()-1
1151 int z = m->getRandomIndex(groupings.size()-1);
1152 int x = m->getRandomIndex(groupings.size()-1);
1153 int a = m->getRandomIndex(groupings[z].size()-1);
1154 int b = m->getRandomIndex(groupings[x].size()-1);
1155 //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;
1156 //if ((z < 0) || (z > 1) || x<0 || x>1 || a <0 || a>groupings[z].size()-1 || b<0 || b>groupings[x].size()-1) { cout << "probelm" << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; }
1161 from.push_back(z); from.push_back(a);
1162 to.push_back(x); to.push_back(b);
1164 randomGroupings[from] = to;
1166 //cout << "done" << endl;
1167 return randomGroupings;
1169 catch(exception& e) {
1170 m->errorOut(e, "IndicatorCommand", "randomizeGroupings");
1174 //**********************************************************************************************************************
1175 //swap groups between groupings, in essence randomizing the second column of the design file
1176 map< vector<int>, vector<int> > IndicatorCommand::randomizeGroupings(vector< vector<SharedRAbundFloatVector*> >& groupings, int numLookupGroups){
1179 map< vector<int>, vector<int> > randomGroupings;
1181 for (int i = 0; i < numLookupGroups; i++) {
1183 //get random groups to swap to switch with
1184 //generate random int between 0 and groupings.size()-1
1185 int z = m->getRandomIndex(groupings.size()-1);
1186 int x = m->getRandomIndex(groupings.size()-1);
1187 int a = m->getRandomIndex(groupings[z].size()-1);
1188 int b = m->getRandomIndex(groupings[x].size()-1);
1189 //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;
1194 from.push_back(z); from.push_back(a);
1195 to.push_back(x); to.push_back(b);
1197 randomGroupings[from] = to;
1200 return randomGroupings;
1202 catch(exception& e) {
1203 m->errorOut(e, "IndicatorCommand", "randomizeGroupings");
1208 /*****************************************************************/