5 * Created by westcott on 11/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "indicatorcommand.h"
11 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> IndicatorCommand::setParameters(){
17 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
18 CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(pdesign);
19 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared);
20 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23 CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(ptree);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "IndicatorCommand", "setParameters");
36 //**********************************************************************************************************************
37 string IndicatorCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The indicator command reads a shared or relabund file and a tree or design file, and outputs a .indicator.tre and .indicator.summary file. \n";
41 helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n";
42 helpString += "The summary file lists the indicator value for each OTU for each node.\n";
43 helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n";
44 helpString += "The design parameter allows you to relate the tree to the shared or relabund file, if your tree contains the grouping names, or if no tree is provided to group your groups into groupings.\n";
45 helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n";
46 helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n";
47 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000.";
48 helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n";
49 helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n";
53 m->errorOut(e, "IndicatorCommand", "getHelpString");
58 //**********************************************************************************************************************
59 IndicatorCommand::IndicatorCommand(){
61 abort = true; calledHelp = true;
63 vector<string> tempOutNames;
64 outputTypes["tree"] = tempOutNames;
65 outputTypes["summary"] = tempOutNames;
68 m->errorOut(e, "IndicatorCommand", "IndicatorCommand");
72 //**********************************************************************************************************************
73 IndicatorCommand::IndicatorCommand(string option) {
75 abort = false; calledHelp = false;
77 //allow user to run help
78 if(option == "help") { help(); abort = true; calledHelp = true; }
79 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
82 vector<string> myArray = setParameters();
84 OptionParser parser(option);
85 map<string, string> parameters = parser.getParameters();
87 ValidParameters validParameter;
88 map<string, string>::iterator it;
90 //check to make sure all parameters are valid for command
91 for (it = parameters.begin(); it != parameters.end(); it++) {
92 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
97 m->namesOfGroups.clear();
101 vector<string> tempOutNames;
102 outputTypes["tree"] = tempOutNames;
103 outputTypes["summary"] = tempOutNames;
105 //if the user changes the input directory command factory will send this info to us in the output parameter
106 string inputDir = validParameter.validFile(parameters, "inputdir", false);
107 if (inputDir == "not found"){ inputDir = ""; }
110 it = parameters.find("tree");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["tree"] = inputDir + it->second; }
118 it = parameters.find("shared");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["shared"] = inputDir + it->second; }
126 it = parameters.find("relabund");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["relabund"] = inputDir + it->second; }
134 it = parameters.find("design");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["design"] = inputDir + it->second; }
143 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { treefile = ""; abort = true; }
148 else if (treefile == "not found") { treefile = ""; }
149 else { m->setTreeFile(treefile); }
151 sharedfile = validParameter.validFile(parameters, "shared", true);
152 if (sharedfile == "not open") { abort = true; }
153 else if (sharedfile == "not found") { sharedfile = ""; }
154 else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
156 relabundfile = validParameter.validFile(parameters, "relabund", true);
157 if (relabundfile == "not open") { abort = true; }
158 else if (relabundfile == "not found") { relabundfile = ""; }
159 else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
161 designfile = validParameter.validFile(parameters, "design", true);
162 if (designfile == "not open") { designfile = ""; abort = true; }
163 else if (designfile == "not found") { designfile = ""; }
164 else { m->setDesignFile(designfile); }
166 groups = validParameter.validFile(parameters, "groups", false);
167 if (groups == "not found") { groups = ""; Groups.push_back("all"); }
168 else { m->splitAtDash(groups, Groups); }
171 label = validParameter.validFile(parameters, "label", false);
172 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
174 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
175 convert(temp, iters);
177 if ((relabundfile == "") && (sharedfile == "")) {
178 //is there are current file available for either of these?
179 //give priority to shared, then relabund
180 sharedfile = m->getSharedFile();
181 if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
183 relabundfile = m->getRelAbundFile();
184 if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
186 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
193 if ((designfile == "") && (treefile == "")) {
194 treefile = m->getTreeFile();
195 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
197 designfile = m->getDesignFile();
198 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
200 m->mothurOut("[ERROR]: You must provide either a tree or design file."); m->mothurOutEndLine(); abort = true;
205 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
209 catch(exception& e) {
210 m->errorOut(e, "IndicatorCommand", "IndicatorCommand");
214 //**********************************************************************************************************************
216 int IndicatorCommand::execute(){
219 if (abort == true) { if (calledHelp) { return 0; } return 2; }
221 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
223 //read designfile if given and set up globaldatas groups for read of sharedfiles
224 if (designfile != "") {
225 designMap = new GroupMap(designfile);
226 designMap->readDesignMap();
228 //fill Groups - checks for "all" and for any typo groups
229 SharedUtil* util = new SharedUtil();
230 util->setGroups(Groups, designMap->namesOfGroups);
233 //loop through the Groups and fill Globaldata's Groups with the design file info
234 m->Groups = designMap->getNamesSeqs(Groups);
237 /***************************************************/
238 // use smart distancing to get right sharedRabund //
239 /***************************************************/
240 if (sharedfile != "") {
242 if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
243 if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
246 if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; }
247 if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
250 //reset Globaldatas groups if needed
251 if (designfile != "") { m->Groups = Groups; }
253 /***************************************************/
254 // reading tree info //
255 /***************************************************/
256 if (treefile != "") {
257 string groupfile = "";
258 m->setTreeFile(treefile);
259 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
260 treeMap = new TreeMap();
261 bool mismatch = false;
263 for (int i = 0; i < m->Treenames.size(); i++) {
264 //sanity check - is this a group that is not in the sharedfile?
265 if (designfile == "") {
266 if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) {
267 m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
270 treeMap->addSeq(m->Treenames[i], "Group1");
272 vector<string> myGroups; myGroups.push_back(m->Treenames[i]);
273 vector<string> myNames = designMap->getNamesSeqs(myGroups);
275 for(int k = 0; k < myNames.size(); k++) {
276 if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) {
277 m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
281 treeMap->addSeq(m->Treenames[i], "Group1");
285 if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; }
287 if (mismatch) { //cleanup and exit
288 if (designfile != "") { delete designMap; }
289 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
290 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
295 read = new ReadNewickTree(treefile);
296 int readOk = read->read(treeMap);
298 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; }
300 vector<Tree*> T = read->getTrees();
304 if (m->control_pressed) {
305 if (designfile != "") { delete designMap; }
306 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
307 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
308 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0;
311 T[0]->assembleTree();
313 /***************************************************/
314 // create ouptut tree - respecting pickedGroups //
315 /***************************************************/
316 Tree* outputTree = new Tree(m->Groups.size(), treeMap);
318 outputTree->getSubTree(T[0], m->Groups);
319 outputTree->assembleTree();
321 //no longer need original tree, we have output tree to use and label
322 for (int i = 0; i < T.size(); i++) { delete T[i]; }
325 if (m->control_pressed) {
326 if (designfile != "") { delete designMap; }
327 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
328 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
329 delete outputTree; delete treeMap; return 0;
332 /***************************************************/
333 // get indicator species values //
334 /***************************************************/
335 GetIndicatorSpecies(outputTree);
336 delete outputTree; delete treeMap;
338 }else { //run with design file only
339 //get indicator species
340 GetIndicatorSpecies();
343 if (designfile != "") { delete designMap; }
344 if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
345 else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
347 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
349 //set tree file as new current treefile
350 if (treefile != "") {
352 itTypes = outputTypes.find("tree");
353 if (itTypes != outputTypes.end()) {
354 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
357 m->mothurOutEndLine();
358 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
359 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
360 m->mothurOutEndLine();
364 catch(exception& e) {
365 m->errorOut(e, "IndicatorCommand", "execute");
369 //**********************************************************************************************************************
370 //divide shared or relabund file by groupings in the design file
371 //report all otu values to file
372 int IndicatorCommand::GetIndicatorSpecies(){
374 string thisOutputDir = outputDir;
375 if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); }
376 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
377 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
380 m->openOutputFile(outputFileName, out);
381 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
382 m->mothurOutEndLine(); m->mothurOut("Species\tIndicatorValue\tpValue\n");
385 if (sharedfile != "") { numBins = lookup[0]->getNumBins(); }
386 else { numBins = lookupFloat[0]->getNumBins(); }
388 if (m->control_pressed) { out.close(); return 0; }
390 /*****************************************************/
391 //create vectors containing rabund info //
392 /*****************************************************/
394 vector<float> indicatorValues; //size of numBins
395 vector<float> pValues;
396 map< vector<int>, vector<int> > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors.
398 if (sharedfile != "") {
399 vector< vector<SharedRAbundVector*> > groupings;
400 set<string> groupsAlreadyAdded;
401 vector<SharedRAbundVector*> subset;
404 for (int i = 0; i < designMap->namesOfGroups.size(); i++) {
406 for (int k = 0; k < lookup.size(); k++) {
407 //are you from this grouping?
408 if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) {
409 subset.push_back(lookup[k]);
410 groupsAlreadyAdded.insert(lookup[k]->getGroup());
413 if (subset.size() != 0) { groupings.push_back(subset); }
417 if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
419 indicatorValues = getValues(groupings, randomGroupingsMap);
421 pValues.resize(indicatorValues.size(), 0);
422 for(int i=0;i<iters;i++){
423 if (m->control_pressed) { break; }
424 randomGroupingsMap = randomizeGroupings(groupings, lookup.size());
425 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
427 for (int j = 0; j < indicatorValues.size(); j++) {
428 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
432 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
435 vector< vector<SharedRAbundFloatVector*> > groupings;
436 set<string> groupsAlreadyAdded;
437 vector<SharedRAbundFloatVector*> subset;
440 for (int i = 0; i < designMap->namesOfGroups.size(); i++) {
441 for (int k = 0; k < lookupFloat.size(); k++) {
442 //are you from this grouping?
443 if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) {
444 subset.push_back(lookupFloat[k]);
445 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
448 if (subset.size() != 0) { groupings.push_back(subset); }
452 if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
454 indicatorValues = getValues(groupings, randomGroupingsMap);
456 pValues.resize(indicatorValues.size(), 0);
457 for(int i=0;i<iters;i++){
458 if (m->control_pressed) { break; }
459 randomGroupingsMap = randomizeGroupings(groupings, lookupFloat.size());
460 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
462 for (int j = 0; j < indicatorValues.size(); j++) {
463 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
467 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
470 if (m->control_pressed) { out.close(); return 0; }
473 /******************************************************/
474 //output indicator values to table form //
475 /*****************************************************/
476 out << "OTU\tIndicator_Value\tpValue" << endl;
477 for (int j = 0; j < indicatorValues.size(); j++) {
479 if (m->control_pressed) { out.close(); return 0; }
481 out << (j+1) << '\t' << indicatorValues[j] << '\t';
483 if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; }
484 else { out << "<" << (1/(float)iters) << endl; }
486 if (pValues[j] <= 0.05) {
487 cout << "OTU" << j+1 << '\t' << indicatorValues[j] << '\t';
488 string pValueString = "<" + toString((1/(float)iters));
489 if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];}
490 else { cout << "<" << (1/(float)iters); }
491 m->mothurOutJustToLog("OTU" + toString(j+1) + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
492 m->mothurOutEndLine();
500 catch(exception& e) {
501 m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");
505 //**********************************************************************************************************************
506 //traverse tree finding indicator species values for each otu at each node
507 //label node with otu number that has highest indicator value
508 //report all otu values to file
509 int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
512 string thisOutputDir = outputDir;
513 if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); }
514 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
515 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
518 m->openOutputFile(outputFileName, out);
519 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
522 if (sharedfile != "") { numBins = lookup[0]->getNumBins(); }
523 else { numBins = lookupFloat[0]->getNumBins(); }
527 for (int i = 0; i < numBins; i++) { out << "OTU" << (i+1) << "_IndValue" << '\t' << "pValue" << '\t'; }
530 m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicatorValue\tpValue\n");
532 string treeOutputDir = outputDir;
533 if (outputDir == "") { treeOutputDir += m->hasPath(treefile); }
534 string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre";
537 //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need
538 map<int, set<string> > nodeToDescendants;
539 map<int, set<int> > descendantNodes;
540 for (int i = 0; i < T->getNumNodes(); i++) {
541 if (m->control_pressed) { return 0; }
543 nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants, descendantNodes);
546 //you need the distances to leaf to decide grouping below
547 //this will also set branch lengths if the tree does not include them
548 map<int, float> distToRoot = getDistToRoot(T);
551 for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
552 //cout << endl << i+1 << endl;
553 if (m->control_pressed) { out.close(); return 0; }
555 /*****************************************************/
556 //create vectors containing rabund info //
557 /*****************************************************/
559 vector<float> indicatorValues; //size of numBins
560 vector<float> pValues;
561 map< vector<int>, vector<int> > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors.
563 if (sharedfile != "") {
564 vector< vector<SharedRAbundVector*> > groupings;
566 //get nodes that will be a valid grouping
567 //you are valid if you are not one of my descendants
568 //AND your distToRoot is >= mine
569 //AND you were not added as part of a larger grouping. Largest nodes are added first.
571 set<string> groupsAlreadyAdded;
572 //create a grouping with my grouping
573 vector<SharedRAbundVector*> subset;
575 int doneCount = nodeToDescendants[i].size();
576 for (int k = 0; k < lookup.size(); k++) {
577 //is this descendant of i
578 if ((nodeToDescendants[i].count(lookup[k]->getGroup()) != 0)) {
579 subset.push_back(lookup[k]);
580 groupsAlreadyAdded.insert(lookup[k]->getGroup());
583 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
585 if (subset.size() != 0) { groupings.push_back(subset); }
588 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
591 if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) {
592 vector<SharedRAbundVector*> subset;
594 int doneCount = nodeToDescendants[j].size();
595 for (int k = 0; k < lookup.size(); k++) {
596 //is this descendant of j, and we didn't already add this as part of a larger grouping
597 if ((nodeToDescendants[j].count(lookup[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0)) {
598 subset.push_back(lookup[k]);
599 groupsAlreadyAdded.insert(lookup[k]->getGroup());
602 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
605 //if subset.size == 0 then the node was added as part of a larger grouping
606 if (subset.size() != 0) { groupings.push_back(subset); }
610 if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
612 indicatorValues = getValues(groupings, randomGroupingsMap);
614 pValues.resize(indicatorValues.size(), 0);
615 for(int i=0;i<iters;i++){
616 if (m->control_pressed) { break; }
617 randomGroupingsMap = randomizeGroupings(groupings, lookup.size());
618 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
620 for (int j = 0; j < indicatorValues.size(); j++) {
621 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
625 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
628 vector< vector<SharedRAbundFloatVector*> > groupings;
630 //get nodes that will be a valid grouping
631 //you are valid if you are not one of my descendants
632 //AND your distToRoot is >= mine
633 //AND you were not added as part of a larger grouping. Largest nodes are added first.
635 set<string> groupsAlreadyAdded;
636 //create a grouping with my grouping
637 vector<SharedRAbundFloatVector*> subset;
639 int doneCount = nodeToDescendants[i].size();
640 for (int k = 0; k < lookupFloat.size(); k++) {
641 //is this descendant of i
642 if ((nodeToDescendants[i].count(lookupFloat[k]->getGroup()) != 0)) {
643 subset.push_back(lookupFloat[k]);
644 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
647 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
649 if (subset.size() != 0) { groupings.push_back(subset); }
651 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
652 if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) {
653 vector<SharedRAbundFloatVector*> subset;
655 int doneCount = nodeToDescendants[j].size();
656 for (int k = 0; k < lookupFloat.size(); k++) {
657 //is this descendant of j, and we didn't already add this as part of a larger grouping
658 if ((nodeToDescendants[j].count(lookupFloat[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookupFloat[k]->getGroup()) == 0)) {
659 subset.push_back(lookupFloat[k]);
660 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
663 if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
666 //if subset.size == 0 then the node was added as part of a larger grouping
667 if (subset.size() != 0) { groupings.push_back(subset); }
671 if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
673 indicatorValues = getValues(groupings, randomGroupingsMap);
675 pValues.resize(indicatorValues.size(), 0);
676 for(int i=0;i<iters;i++){
678 if (m->control_pressed) { break; }
679 randomGroupingsMap = randomizeGroupings(groupings, lookup.size());
680 vector<float> randomIndicatorValues = getValues(groupings, randomGroupingsMap);
682 for (int j = 0; j < indicatorValues.size(); j++) {
683 if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; }
687 for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; }
690 if (m->control_pressed) { out.close(); return 0; }
693 /******************************************************/
694 //output indicator values to table form + label tree //
695 /*****************************************************/
696 out << (i+1) << '\t';
697 for (int j = 0; j < indicatorValues.size(); j++) {
699 if (m->control_pressed) { out.close(); return 0; }
701 if (pValues[j] < (1/(float)iters)) {
702 out << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t';
704 out << indicatorValues[j] << '\t' << pValues[j] << '\t';
707 if (pValues[j] <= 0.05) {
708 cout << i+1 << "\tOTU" << j+1 << '\t' << indicatorValues[j] << '\t';
709 string pValueString = "<" + toString((1/(float)iters));
710 if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];}
711 else { cout << "<" << (1/(float)iters); }
712 m->mothurOutJustToLog(toString(i) + "\tOTU" + toString(j+1) + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
713 m->mothurOutEndLine();
718 T->tree[i].setLabel((i+1));
723 m->openOutputFile(outputTreeFileName, outTree);
724 outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
726 T->print(outTree, "both");
731 catch(exception& e) {
732 m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");
736 //**********************************************************************************************************************
737 vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundFloatVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap){
739 vector<float> values;
740 map< vector<int>, vector<int> >::iterator it;
743 for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
745 if (m->control_pressed) { return values; }
748 float AijDenominator = 0.0;
751 //get overall abundance of each grouping
752 for (int j = 0; j < groupings.size(); j++) {
754 float totalAbund = 0;
756 for (int k = 0; k < groupings[j].size(); k++) {
757 vector<int> temp; temp.push_back(j); temp.push_back(k);
758 it = groupingsMap.find(temp);
760 if (it == groupingsMap.end()) { //this one didnt get moved
761 totalAbund += groupings[j][k]->getAbundance(i);
762 if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
764 totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i);
765 if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
771 float Aij = (totalAbund / (float) groupings[j].size());
772 terms.push_back(Aij);
774 //percentage of sites represented
775 Bij.push_back(numNotZero / (float) groupings[j].size());
777 AijDenominator += Aij;
780 float maxIndVal = 0.0;
781 for (int j = 0; j < terms.size(); j++) {
782 float thisAij = (terms[j] / AijDenominator); //relative abundance
783 float thisValue = thisAij * Bij[j] * 100.0;
786 if (thisValue > maxIndVal) { maxIndVal = thisValue; }
789 values.push_back(maxIndVal);
794 catch(exception& e) {
795 m->errorOut(e, "IndicatorCommand", "getValues");
799 //**********************************************************************************************************************
800 //same as above, just data type difference
801 vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap){
803 vector<float> values;
805 /*for (int j = 0; j < groupings.size(); j++) {
806 cout << "grouping " << j << endl;
807 for (int k = 0; k < groupings[j].size(); k++) {
808 cout << groupings[j][k]->getGroup() << endl;
811 map< vector<int>, vector<int> >::iterator it;
814 for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
816 float AijDenominator = 0.0;
818 //get overall abundance of each grouping
819 for (int j = 0; j < groupings.size(); j++) {
821 int totalAbund = 0.0;
823 for (int k = 0; k < groupings[j].size(); k++) {
824 vector<int> temp; temp.push_back(j); temp.push_back(k);
825 it = groupingsMap.find(temp);
827 if (it == groupingsMap.end()) { //this one didnt get moved
828 totalAbund += groupings[j][k]->getAbundance(i);
829 if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
831 totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i);
832 if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
838 float Aij = (totalAbund / (float) groupings[j].size());
839 terms.push_back(Aij);
841 //percentage of sites represented
842 Bij.push_back(numNotZero / (float) groupings[j].size());
844 AijDenominator += Aij;
847 float maxIndVal = 0.0;
848 for (int j = 0; j < terms.size(); j++) {
849 float thisAij = (terms[j] / AijDenominator); //relative abundance
850 float thisValue = thisAij * Bij[j] * 100.0;
853 if (thisValue > maxIndVal) { maxIndVal = thisValue; }
856 values.push_back(maxIndVal);
861 catch(exception& e) {
862 m->errorOut(e, "IndicatorCommand", "getValues");
866 //**********************************************************************************************************************
867 //you need the distances to root to decide groupings
868 //this will also set branch lengths if the tree does not include them
869 map<int, float> IndicatorCommand::getDistToRoot(Tree*& T){
871 map<int, float> dists;
873 bool hasBranchLengths = false;
874 for (int i = 0; i < T->getNumNodes(); i++) {
875 if (T->tree[i].getBranchLength() > 0.0) { hasBranchLengths = true; break; }
878 //set branchlengths if needed
879 if (!hasBranchLengths) {
880 for (int i = 0; i < T->getNumNodes(); i++) {
882 int lc = T->tree[i].getLChild();
883 int rc = T->tree[i].getRChild();
885 if (lc == -1) { // you are a leaf
886 //if you are a leaf set you priliminary length to 1.0, this may adjust later
887 T->tree[i].setBranchLength(1.0);
889 }else{ // you are an internal node
890 //look at your children's length to leaf
891 float ldist = dists[lc];
892 float rdist = dists[rc];
894 float greater = ldist;
895 if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0;}
896 else { dists[i] = rdist + 1.0; }
899 //branch length = difference + 1
900 T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0));
901 T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0));
908 for (int i = 0; i < T->getNumNodes(); i++) {
913 while(T->tree[index].getParent() != -1){
914 if (T->tree[index].getBranchLength() != -1) {
915 sum += abs(T->tree[index].getBranchLength());
917 index = T->tree[index].getParent();
925 catch(exception& e) {
926 m->errorOut(e, "IndicatorCommand", "getLengthToLeaf");
930 //**********************************************************************************************************************
931 set<string> IndicatorCommand::getDescendantList(Tree*& T, int i, map<int, set<string> > descendants, map<int, set<int> >& nodes){
935 set<string>::iterator it;
937 int lc = T->tree[i].getLChild();
938 int rc = T->tree[i].getRChild();
940 if (lc == -1) { //you are a leaf your only descendant is yourself
941 set<int> temp; temp.insert(i);
944 if (designfile == "") {
945 names.insert(T->tree[i].getName());
947 vector<string> myGroup; myGroup.push_back(T->tree[i].getName());
948 vector<string> myReps = designMap->getNamesSeqs(myGroup);
949 for (int k = 0; k < myReps.size(); k++) {
950 names.insert(myReps[k]);
954 }else{ //your descedants are the combination of your childrens descendants
955 names = descendants[lc];
956 nodes[i] = nodes[lc];
957 for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
960 for (set<int>::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) {
961 nodes[i].insert(*itNum);
963 //you are your own descendant
969 catch(exception& e) {
970 m->errorOut(e, "IndicatorCommand", "getDescendantList");
974 //**********************************************************************************************************************
975 int IndicatorCommand::getShared(){
977 InputData* input = new InputData(sharedfile, "sharedfile");
978 lookup = input->getSharedRAbundVectors();
979 string lastLabel = lookup[0]->getLabel();
981 if (label == "") { label = lastLabel; delete input; return 0; }
983 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
984 set<string> labels; labels.insert(label);
985 set<string> processedLabels;
986 set<string> userLabels = labels;
988 //as long as you are not at the end of the file or done wih the lines you want
989 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
990 if (m->control_pressed) { delete input; return 0; }
992 if(labels.count(lookup[0]->getLabel()) == 1){
993 processedLabels.insert(lookup[0]->getLabel());
994 userLabels.erase(lookup[0]->getLabel());
998 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
999 string saveLabel = lookup[0]->getLabel();
1001 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
1002 lookup = input->getSharedRAbundVectors(lastLabel);
1004 processedLabels.insert(lookup[0]->getLabel());
1005 userLabels.erase(lookup[0]->getLabel());
1007 //restore real lastlabel to save below
1008 lookup[0]->setLabel(saveLabel);
1012 lastLabel = lookup[0]->getLabel();
1014 //get next line to process
1015 //prevent memory leak
1016 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
1017 lookup = input->getSharedRAbundVectors();
1021 if (m->control_pressed) { delete input; return 0; }
1023 //output error messages about any remaining user labels
1024 set<string>::iterator it;
1025 bool needToRun = false;
1026 for (it = userLabels.begin(); it != userLabels.end(); it++) {
1027 m->mothurOut("Your file does not include the label " + *it);
1028 if (processedLabels.count(lastLabel) != 1) {
1029 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
1032 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
1036 //run last label if you need to
1037 if (needToRun == true) {
1038 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
1039 lookup = input->getSharedRAbundVectors(lastLabel);
1045 catch(exception& e) {
1046 m->errorOut(e, "IndicatorCommand", "getShared");
1050 //**********************************************************************************************************************
1051 int IndicatorCommand::getSharedFloat(){
1053 InputData* input = new InputData(relabundfile, "relabund");
1054 lookupFloat = input->getSharedRAbundFloatVectors();
1055 string lastLabel = lookupFloat[0]->getLabel();
1057 if (label == "") { label = lastLabel; delete input; return 0; }
1059 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
1060 set<string> labels; labels.insert(label);
1061 set<string> processedLabels;
1062 set<string> userLabels = labels;
1064 //as long as you are not at the end of the file or done wih the lines you want
1065 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
1067 if (m->control_pressed) { delete input; return 0; }
1069 if(labels.count(lookupFloat[0]->getLabel()) == 1){
1070 processedLabels.insert(lookupFloat[0]->getLabel());
1071 userLabels.erase(lookupFloat[0]->getLabel());
1075 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
1076 string saveLabel = lookupFloat[0]->getLabel();
1078 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
1079 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
1081 processedLabels.insert(lookupFloat[0]->getLabel());
1082 userLabels.erase(lookupFloat[0]->getLabel());
1084 //restore real lastlabel to save below
1085 lookupFloat[0]->setLabel(saveLabel);
1089 lastLabel = lookupFloat[0]->getLabel();
1091 //get next line to process
1092 //prevent memory leak
1093 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
1094 lookupFloat = input->getSharedRAbundFloatVectors();
1098 if (m->control_pressed) { delete input; return 0; }
1100 //output error messages about any remaining user labels
1101 set<string>::iterator it;
1102 bool needToRun = false;
1103 for (it = userLabels.begin(); it != userLabels.end(); it++) {
1104 m->mothurOut("Your file does not include the label " + *it);
1105 if (processedLabels.count(lastLabel) != 1) {
1106 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
1109 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
1113 //run last label if you need to
1114 if (needToRun == true) {
1115 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
1116 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
1122 catch(exception& e) {
1123 m->errorOut(e, "IndicatorCommand", "getShared");
1127 //**********************************************************************************************************************
1128 //swap groups between groupings, in essence randomizing the second column of the design file
1129 map< vector<int>, vector<int> > IndicatorCommand::randomizeGroupings(vector< vector<SharedRAbundVector*> >& groupings, int numLookupGroups){
1132 map< vector<int>, vector<int> > randomGroupings;
1134 for (int i = 0; i < numLookupGroups; i++) {
1135 if (m->control_pressed) {break;}
1137 //get random groups to swap to switch with
1138 //generate random int between 0 and groupings.size()-1
1139 int z = m->getRandomIndex(groupings.size()-1);
1140 int x = m->getRandomIndex(groupings.size()-1);
1141 int a = m->getRandomIndex(groupings[z].size()-1);
1142 int b = m->getRandomIndex(groupings[x].size()-1);
1143 //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;
1144 //if ((z < 0) || (z > 1) || x<0 || x>1 || a <0 || a>groupings[z].size()-1 || b<0 || b>groupings[x].size()-1) { cout << "probelm" << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; }
1149 from.push_back(z); from.push_back(a);
1150 to.push_back(x); to.push_back(b);
1152 randomGroupings[from] = to;
1154 //cout << "done" << endl;
1155 return randomGroupings;
1157 catch(exception& e) {
1158 m->errorOut(e, "IndicatorCommand", "randomizeGroupings");
1162 //**********************************************************************************************************************
1163 //swap groups between groupings, in essence randomizing the second column of the design file
1164 map< vector<int>, vector<int> > IndicatorCommand::randomizeGroupings(vector< vector<SharedRAbundFloatVector*> >& groupings, int numLookupGroups){
1167 map< vector<int>, vector<int> > randomGroupings;
1169 for (int i = 0; i < numLookupGroups; i++) {
1171 //get random groups to swap to switch with
1172 //generate random int between 0 and groupings.size()-1
1173 int z = m->getRandomIndex(groupings.size()-1);
1174 int x = m->getRandomIndex(groupings.size()-1);
1175 int a = m->getRandomIndex(groupings[z].size()-1);
1176 int b = m->getRandomIndex(groupings[x].size()-1);
1177 //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;
1182 from.push_back(z); from.push_back(a);
1183 to.push_back(x); to.push_back(b);
1185 randomGroupings[from] = to;
1188 return randomGroupings;
1190 catch(exception& e) {
1191 m->errorOut(e, "IndicatorCommand", "randomizeGroupings");
1196 /*****************************************************************/