]> git.donarmstrong.com Git - mothur.git/blob - indicatorcommand.cpp
changes while testing
[mothur.git] / indicatorcommand.cpp
1 /*
2  *  indicatorcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "indicatorcommand.h"
11 #include "sharedutilities.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> IndicatorCommand::setParameters(){       
16         try {
17                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
18                 CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none","summary",false,false,true); parameters.push_back(pdesign);
19                 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","summary",false,false,true); parameters.push_back(pshared);   
20                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","summary",false,false); parameters.push_back(prelabund);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
22                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
23                 CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none","tree-summary",false,false,true); parameters.push_back(ptree);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "IndicatorCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string IndicatorCommand::getHelpString(){       
39         try {
40                 string helpString = "";
41                 helpString += "The indicator command can be run in 3 ways: with a shared or relabund file and a design file, or with a shared or relabund file and a tree file, or with a shared or relabund file, tree file and design file. \n";
42                 helpString += "The indicator command outputs a .indicator.summary file and a .indicator.tre if a tree is given. \n";
43                 helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
44                 helpString += "The summary file lists the indicator value for each OTU for each node.\n";
45                 helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. \n";
46                 helpString += "The design parameter allows you to relate the tree to the shared or relabund file, if your tree contains the grouping names, or if no tree is provided to group your groups into groupings.\n";                  
47                 helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file.  The groups may be entered separated by dashes.\n";
48                 helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000.";
51                 helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; 
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "IndicatorCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string IndicatorCommand::getOutputPattern(string type) {
62     try {
63         string pattern = "";
64         
65         if (type == "tree") {  pattern = "[filename],indicator.tre"; } 
66         else if (type == "summary") {  pattern = "[filename],indicator.summary"; } 
67         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
68         
69         return pattern;
70     }
71     catch(exception& e) {
72         m->errorOut(e, "IndicatorCommand", "getOutputPattern");
73         exit(1);
74     }
75 }
76 //**********************************************************************************************************************
77 IndicatorCommand::IndicatorCommand(){   
78         try {
79                 abort = true; calledHelp = true; 
80                 setParameters();
81                 vector<string> tempOutNames;
82                 outputTypes["tree"] = tempOutNames;
83                 outputTypes["summary"] = tempOutNames;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "IndicatorCommand", "IndicatorCommand");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 IndicatorCommand::IndicatorCommand(string option)  {
92         try {
93                 abort = false; calledHelp = false;   
94                 
95                 //allow user to run help
96                 if(option == "help") { help(); abort = true; calledHelp = true; }
97                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
98                 
99                 else {
100                         vector<string> myArray = setParameters();
101                         
102                         OptionParser parser(option);
103                         map<string, string> parameters = parser.getParameters();
104                         
105                         ValidParameters validParameter;
106                         map<string, string>::iterator it;
107                         
108                         //check to make sure all parameters are valid for command
109                         for (it = parameters.begin(); it != parameters.end(); it++) { 
110                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
111                         }
112                         
113                         m->runParse = true;
114                         m->clearGroups();
115                         m->clearAllGroups();
116                         m->Treenames.clear();
117                         
118                         vector<string> tempOutNames;
119                         outputTypes["tree"] = tempOutNames;
120                         outputTypes["summary"] = tempOutNames;
121                         
122                         //if the user changes the input directory command factory will send this info to us in the output parameter 
123                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
124                         if (inputDir == "not found"){   inputDir = "";          }
125                         else {
126                                 string path;
127                                 it = parameters.find("tree");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
133                                 }
134                                 
135                                 it = parameters.find("shared");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
141                                 }
142                                 
143                                 it = parameters.find("relabund");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
149                                 }
150                                 
151                                 it = parameters.find("design");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
157                                 }
158                         }
159                         
160                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
161
162                         //check for required parameters
163                         treefile = validParameter.validFile(parameters, "tree", true);
164                         if (treefile == "not open") { treefile = ""; abort = true; }
165                         else if (treefile == "not found") { treefile = "";      }               
166                         else { m->setTreeFile(treefile); }      
167                         
168                         sharedfile = validParameter.validFile(parameters, "shared", true);
169                         if (sharedfile == "not open") { abort = true; }
170                         else if (sharedfile == "not found") { sharedfile = ""; }
171                         else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
172                         
173                         relabundfile = validParameter.validFile(parameters, "relabund", true);
174                         if (relabundfile == "not open") { abort = true; }
175                         else if (relabundfile == "not found") { relabundfile = ""; }
176                         else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
177                         
178                         designfile = validParameter.validFile(parameters, "design", true);
179                         if (designfile == "not open") { designfile = ""; abort = true; }
180                         else if (designfile == "not found") { designfile = ""; }
181                         else { m->setDesignFile(designfile); }
182                         
183                         groups = validParameter.validFile(parameters, "groups", false);                 
184                         if (groups == "not found") { groups = "";  Groups.push_back("all"); }
185                         else { m->splitAtDash(groups, Groups);  }                       
186                         m->setGroups(Groups);
187                         
188                         label = validParameter.validFile(parameters, "label", false);                   
189                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
190                         
191                         string temp = validParameter.validFile(parameters, "iters", false);             if (temp == "not found") { temp = "1000"; }
192                         m->mothurConvert(temp, iters); 
193                         
194                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
195                         m->setProcessors(temp);
196                         m->mothurConvert(temp, processors); 
197                         
198                         if ((relabundfile == "") && (sharedfile == "")) { 
199                                 //is there are current file available for either of these?
200                                 //give priority to shared, then relabund
201                                 sharedfile = m->getSharedFile(); 
202                                 if (sharedfile != "") {  inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
203                                 else { 
204                                         relabundfile = m->getRelAbundFile(); 
205                                         if (relabundfile != "") {  inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
206                                         else { 
207                                                 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); 
208                                                 abort = true;
209                                         }
210                                 }
211                         }
212                         
213                         
214                         if ((designfile == "") && (treefile == "")) { 
215                                 treefile = m->getTreeFile(); 
216                                 if (treefile != "") {  m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
217                                 else { 
218                                         designfile = m->getDesignFile(); 
219                                         if (designfile != "") {  m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
220                                         else { 
221                                                 m->mothurOut("[ERROR]: You must provide either a tree or design file."); m->mothurOutEndLine(); abort = true; 
222                                         }
223                                 }
224                         }
225                         
226                         if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true;  }
227                         
228                         
229                 }
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "IndicatorCommand", "IndicatorCommand");         
233                 exit(1);
234         }
235 }
236 //**********************************************************************************************************************
237
238 int IndicatorCommand::execute(){
239         try {
240                 
241                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
242                 
243                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
244                 
245                 int start = time(NULL);
246         
247                 //read designfile if given and set up groups for read of sharedfiles
248                 if (designfile != "") {
249                         designMap = new GroupMap(designfile);
250                         designMap->readDesignMap();
251                         
252                         //fill Groups - checks for "all" and for any typo groups
253                         SharedUtil util;
254                         vector<string> nameGroups = designMap->getNamesOfGroups();
255                         util.setGroups(Groups, nameGroups);
256                         designMap->setNamesOfGroups(nameGroups);
257                         
258                         vector<string> namesSeqs = designMap->getNamesSeqs(Groups);
259                         m->setGroups(namesSeqs);
260                 }
261         
262                 /***************************************************/
263                 // use smart distancing to get right sharedRabund  //
264                 /***************************************************/
265                 if (sharedfile != "") {  
266                         getShared(); 
267                         if (m->control_pressed) { if (designfile != "") { delete designMap; } for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
268                         if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
269                 }else { 
270                         getSharedFloat(); 
271                         if (m->control_pressed) {  if (designfile != "") { delete designMap; } for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
272                         if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
273                 }
274                 
275                 //reset groups if needed
276                 if (designfile != "") { m->setGroups(Groups); }
277                         
278                 /***************************************************/
279                 //    reading tree info                                                    //
280                 /***************************************************/
281                 if (treefile != "") {
282                         string groupfile = ""; 
283                         m->setTreeFile(treefile);
284                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
285                         ct = new CountTable();
286                         bool mismatch = false;
287             
288             set<string> nameMap;
289             map<string, string> groupMap;
290             set<string> gps;
291             for (int i = 0; i < m->Treenames.size(); i++) { 
292                 nameMap.insert(m->Treenames[i]); 
293                 //sanity check - is this a group that is not in the sharedfile?
294                 if (i == 0) { gps.insert("Group1"); }
295                                 if (designfile == "") {
296                                         if (!(m->inUsersGroups(m->Treenames[i], m->getAllGroups()))) {
297                                                 m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
298                                                 mismatch = true;
299                                         }
300                                         groupMap[m->Treenames[i]] = "Group1"; 
301                                 }else{
302                                         vector<string> myGroups; myGroups.push_back(m->Treenames[i]);
303                                         vector<string> myNames = designMap->getNamesSeqs(myGroups);
304                                         
305                                         for(int k = 0; k < myNames.size(); k++) {
306                                                 if (!(m->inUsersGroups(myNames[k], m->getAllGroups()))) {
307                                                         m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
308                                                         mismatch = true;
309                                                 }
310                                         }
311                                         groupMap[m->Treenames[i]] = "Group1";
312                                 }
313             }
314             ct->createTable(nameMap, groupMap, gps);
315                         
316                         if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; }
317                                         
318                         if (mismatch) { //cleanup and exit
319                                 if (designfile != "") { delete designMap; }
320                                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
321                                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
322                                 delete ct;
323                                 return 0;
324                         }
325                  
326                         read = new ReadNewickTree(treefile);
327                         int readOk = read->read(ct); 
328                         
329                         if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete ct; delete read; return 0; }
330                         
331                         vector<Tree*> T = read->getTrees();
332                         
333                         delete read;
334                         
335                         if (m->control_pressed) { 
336                                 if (designfile != "") { delete designMap; }
337                                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
338                                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
339                                 for (int i = 0; i < T.size(); i++) {  delete T[i];  }  delete ct; return 0; 
340                         }
341             
342                         T[0]->assembleTree();
343                                         
344                         /***************************************************/
345                         //    create ouptut tree - respecting pickedGroups //
346                         /***************************************************/
347                         Tree* outputTree = new Tree(m->getNumGroups(), ct); 
348                         
349                         outputTree->getSubTree(T[0], m->getGroups());
350                         outputTree->assembleTree();
351                                 
352                         //no longer need original tree, we have output tree to use and label
353                         for (int i = 0; i < T.size(); i++) {  delete T[i];  } 
354                         
355                         if (m->control_pressed) { 
356                                 if (designfile != "") { delete designMap; }
357                                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
358                                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
359                                 delete outputTree; delete ct;  return 0; 
360                         }
361                         
362                         /***************************************************/
363                         //              get indicator species values                       //
364                         /***************************************************/
365                         GetIndicatorSpecies(outputTree);
366                         delete outputTree; delete ct;
367                         
368                 }else { //run with design file only
369                         //get indicator species
370                         GetIndicatorSpecies();
371                 }
372                 
373                 if (designfile != "") { delete designMap; }
374                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
375                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
376                 
377                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
378                 
379                 //set tree file as new current treefile
380                 if (treefile != "") {
381                         string current = "";
382                         itTypes = outputTypes.find("tree");
383                         if (itTypes != outputTypes.end()) {
384                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
385                         }
386                 }
387                 
388                 m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the indicator species."); m->mothurOutEndLine();
389                 m->mothurOutEndLine();
390                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
391                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
392                 m->mothurOutEndLine();
393                                 
394                 return 0;
395         }
396         catch(exception& e) {
397                 m->errorOut(e, "IndicatorCommand", "execute");  
398                 exit(1);
399         }
400 }
401 //**********************************************************************************************************************
402 //divide shared or relabund file by groupings in the design file
403 //report all otu values to file
404 int IndicatorCommand::GetIndicatorSpecies(){
405         try {
406                 string thisOutputDir = outputDir;
407                 if (outputDir == "") {  thisOutputDir += m->hasPath(inputFileName);  }
408         map<string, string> variables; 
409         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName));
410                 string outputFileName = getOutputFileName("summary", variables);
411                 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
412                 
413                 ofstream out;
414                 m->openOutputFile(outputFileName, out);
415                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
416                 m->mothurOutEndLine(); m->mothurOut("Species\tIndicator_Groups\tIndicatorValue\tpValue\n");
417                 
418                 int numBins = 0;
419                 if (sharedfile != "") { numBins = lookup[0]->getNumBins(); }
420                 else { numBins = lookupFloat[0]->getNumBins(); }
421                 
422                 if (m->control_pressed) { out.close(); return 0; }
423                         
424                 /*****************************************************/
425                 //create vectors containing rabund info                          //
426                 /*****************************************************/
427                         
428                 vector<float> indicatorValues; //size of numBins
429                 vector<float> pValues;
430         vector<string> indicatorGroups;
431                 map< vector<int>, vector<int> > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors.
432                         
433                 if (sharedfile != "") {
434                         vector< vector<SharedRAbundVector*> > groupings;
435                         set<string> groupsAlreadyAdded;
436                         vector<SharedRAbundVector*> subset;
437                         
438                         //for each grouping
439                         for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) {
440                                 
441                                 for (int k = 0; k < lookup.size(); k++) {
442                                         //are you from this grouping?
443                                         if (designMap->getGroup(lookup[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) {
444                                                 subset.push_back(lookup[k]);
445                                                 groupsAlreadyAdded.insert(lookup[k]->getGroup());
446                                         }
447                                 }
448                                 if (subset.size() != 0) { groupings.push_back(subset); }
449                                 subset.clear();
450                         }
451                                 
452                         if (groupsAlreadyAdded.size() != lookup.size()) {  m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
453                                 
454                         indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
455                         
456                         pValues = getPValues(groupings, lookup.size(), indicatorValues);                                
457                 }else {
458                         vector< vector<SharedRAbundFloatVector*> > groupings;
459                         set<string> groupsAlreadyAdded;
460                         vector<SharedRAbundFloatVector*> subset;
461                         
462                         //for each grouping
463                         for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) {
464                                 for (int k = 0; k < lookupFloat.size(); k++) {
465                                         //are you from this grouping?
466                                         if (designMap->getGroup(lookupFloat[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) {
467                                                 subset.push_back(lookupFloat[k]);
468                                                 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
469                                         }
470                                 }
471                                 if (subset.size() != 0) { groupings.push_back(subset); }
472                                 subset.clear();
473                         }
474                         
475                         if (groupsAlreadyAdded.size() != lookupFloat.size()) {  m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
476                         
477                         indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
478                         
479                         pValues = getPValues(groupings, lookupFloat.size(), indicatorValues);
480                 }
481                         
482                 if (m->control_pressed) { out.close(); return 0; }
483                         
484                         
485                 /******************************************************/
486                 //output indicator values to table form               //
487                 /*****************************************************/
488                 out << "OTU\tIndicator_Groups\tIndicator_Value\tpValue" << endl;
489                 for (int j = 0; j < indicatorValues.size(); j++) {
490                                 
491                         if (m->control_pressed) { out.close(); return 0; }
492                         
493                         out << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; 
494                         
495                         if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } 
496                         else { out << "<" << (1/(float)iters) << endl; }
497                         
498                         if (pValues[j] <= 0.05) {
499                                 cout << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j]  << '\t';
500                                 string pValueString = "<" + toString((1/(float)iters)); 
501                                 if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} 
502                                 else { cout << "<" << (1/(float)iters); }
503                                 m->mothurOutJustToLog(m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
504                                 m->mothurOutEndLine(); 
505                         }
506                 }
507                 
508                 out.close();
509                 
510                 return 0;
511         }
512         catch(exception& e) {
513                 m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");      
514                 exit(1);
515         }
516 }
517 //**********************************************************************************************************************
518 //traverse tree finding indicator species values for each otu at each node
519 //label node with otu number that has highest indicator value
520 //report all otu values to file
521 int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
522         try {
523                 
524                 string thisOutputDir = outputDir;
525                 if (outputDir == "") {  thisOutputDir += m->hasPath(inputFileName);  }
526         map<string, string> variables; 
527         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName));
528                 string outputFileName = getOutputFileName("summary",variables);
529                 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
530                 
531                 ofstream out;
532                 m->openOutputFile(outputFileName, out);
533                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
534                 
535                 int numBins = 0;
536                 if (sharedfile != "") { numBins = lookup[0]->getNumBins(); }
537                 else { numBins = lookupFloat[0]->getNumBins(); }
538                 
539                 //print headings
540                 out << "TreeNode\t";
541                 for (int i = 0; i < numBins; i++) { out << m->currentBinLabels[i] << "_IndGroups" << '\t' << m->currentBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; }
542                 out << endl;
543                 
544                 m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n");
545                 
546                 string treeOutputDir = outputDir;
547                 if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
548         variables["[filename]"] = treeOutputDir + m->getRootName(m->getSimpleName(treefile));
549                 string outputTreeFileName = getOutputFileName("tree", variables);
550                 
551                 
552                 //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need
553                 map<int, set<string> > nodeToDescendants;
554                 map<int, set<int> > descendantNodes;
555                 for (int i = 0; i < T->getNumNodes(); i++) {
556                         if (m->control_pressed) { return 0; }
557                         
558                         nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants, descendantNodes);
559                 }
560                 
561                 //you need the distances to leaf to decide grouping below
562                 //this will also set branch lengths if the tree does not include them
563                 map<int, float> distToRoot = getDistToRoot(T);
564                         
565                 //for each node
566                 for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
567                         //cout << endl << i+1 << endl;  
568                         if (m->control_pressed) { out.close(); return 0; }
569                         
570                         /*****************************************************/
571                         //create vectors containing rabund info                          //
572                         /*****************************************************/
573                         
574                         vector<float> indicatorValues; //size of numBins
575                         vector<float> pValues;
576             vector<string> indicatorGroups;
577                         map< vector<int>, vector<int> > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors.
578                         
579                         if (sharedfile != "") {
580                                 vector< vector<SharedRAbundVector*> > groupings;
581                                 
582                                 //get nodes that will be a valid grouping
583                                 //you are valid if you are not one of my descendants
584                                 //AND your distToRoot is >= mine
585                                 //AND you were not added as part of a larger grouping. Largest nodes are added first.
586                                 
587                                 set<string> groupsAlreadyAdded;
588                                 //create a grouping with my grouping
589                                 vector<SharedRAbundVector*> subset;
590                                 int count = 0;
591                                 int doneCount = nodeToDescendants[i].size();
592                                 for (int k = 0; k < lookup.size(); k++) {
593                                         //is this descendant of i
594                                         if ((nodeToDescendants[i].count(lookup[k]->getGroup()) != 0)) { 
595                                                 subset.push_back(lookup[k]);
596                                                 groupsAlreadyAdded.insert(lookup[k]->getGroup());
597                                                 count++;
598                                         }
599                                         if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
600                                 }
601                                 if (subset.size() != 0) { groupings.push_back(subset); }
602                                 
603                                 
604                                 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
605         
606                                         
607                                         if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { 
608                                                 vector<SharedRAbundVector*> subset;
609                                                 int count = 0;
610                                                 int doneCount = nodeToDescendants[j].size();
611                                                 for (int k = 0; k < lookup.size(); k++) {
612                                                         //is this descendant of j, and we didn't already add this as part of a larger grouping
613                                                         if ((nodeToDescendants[j].count(lookup[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0)) { 
614                                                                 subset.push_back(lookup[k]);
615                                                                 groupsAlreadyAdded.insert(lookup[k]->getGroup());
616                                                                 count++;
617                                                         }
618                                                         if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
619                                                 }
620                                                 
621                                                 //if subset.size == 0 then the node was added as part of a larger grouping
622                                                 if (subset.size() != 0) { groupings.push_back(subset); }
623                                         }
624                                 }
625                                 
626                                 if (groupsAlreadyAdded.size() != lookup.size()) {  m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
627                                                                 
628                                 indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
629                                 
630                                 pValues = getPValues(groupings, lookup.size(), indicatorValues);                                
631                         }else {
632                                 vector< vector<SharedRAbundFloatVector*> > groupings;
633                                 
634                                 //get nodes that will be a valid grouping
635                                 //you are valid if you are not one of my descendants
636                                 //AND your distToRoot is >= mine
637                                 //AND you were not added as part of a larger grouping. Largest nodes are added first.
638                                 
639                                 set<string> groupsAlreadyAdded;
640                                 //create a grouping with my grouping
641                                 vector<SharedRAbundFloatVector*> subset;
642                                 int count = 0;
643                                 int doneCount = nodeToDescendants[i].size();
644                                 for (int k = 0; k < lookupFloat.size(); k++) {
645                                         //is this descendant of i
646                                         if ((nodeToDescendants[i].count(lookupFloat[k]->getGroup()) != 0)) { 
647                                                 subset.push_back(lookupFloat[k]);
648                                                 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
649                                                 count++;
650                                         }
651                                         if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
652                                 }
653                                 if (subset.size() != 0) { groupings.push_back(subset); }
654                                 
655                                 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
656                                         if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) {
657                                                 vector<SharedRAbundFloatVector*> subset;
658                                                 int count = 0;
659                                                 int doneCount = nodeToDescendants[j].size();
660                                                 for (int k = 0; k < lookupFloat.size(); k++) {
661                                                         //is this descendant of j, and we didn't already add this as part of a larger grouping
662                                                         if ((nodeToDescendants[j].count(lookupFloat[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookupFloat[k]->getGroup()) == 0)) { 
663                                                                 subset.push_back(lookupFloat[k]);
664                                                                 groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
665                                                                 count++;
666                                                         }
667                                                         if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
668                                                 }
669                                                 
670                                                 //if subset.size == 0 then the node was added as part of a larger grouping
671                                                 if (subset.size() != 0) { groupings.push_back(subset); }
672                                         }
673                                 }
674                                 
675                                 if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
676                                 
677                                 indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
678                                 
679                                 pValues = getPValues(groupings, lookupFloat.size(), indicatorValues);
680                         }
681                         
682                         if (m->control_pressed) { out.close(); return 0; }
683                         
684                         
685                         /******************************************************/
686                         //output indicator values to table form + label tree  //
687                         /*****************************************************/
688                         out << (i+1) << '\t';
689                         for (int j = 0; j < indicatorValues.size(); j++) {
690                                 
691                                 if (m->control_pressed) { out.close(); return 0; }
692                                 
693                                 if (pValues[j] < (1/(float)iters)) {
694                                         out << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t';
695                                 }else {
696                                         out << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t' << pValues[j] << '\t';
697                                 }
698                                 
699                                 if (pValues[j] <= 0.05) {
700                                         cout << i+1 << '\t' << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j]  << '\t';
701                                         string pValueString = "<" + toString((1/(float)iters)); 
702                                         if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} 
703                                         else { cout << "<" << (1/(float)iters); }
704                                         m->mothurOutJustToLog(toString(i) + "\t" + m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
705                                         m->mothurOutEndLine(); 
706                                 }
707                         }
708                         out << endl;
709                         
710                         T->tree[i].setLabel((i+1));
711                 }
712                 out.close();
713         
714                 ofstream outTree;
715                 m->openOutputFile(outputTreeFileName, outTree);
716                 outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
717                 
718                 T->print(outTree, "both");
719                 outTree.close();
720         
721                 return 0;
722         }
723         catch(exception& e) {
724                 m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");      
725                 exit(1);
726         }
727 }
728 //**********************************************************************************************************************
729 vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundFloatVector*> >& groupings, vector<string>& indicatorGroupings, map< vector<int>, vector<int> > groupingsMap){
730         try {
731                 vector<float> values;
732                 map< vector<int>, vector<int> >::iterator it;
733         
734         indicatorGroupings.clear();
735         
736         //create grouping strings
737         vector<string> groupingsGroups;
738         for (int j = 0; j < groupings.size(); j++) {
739             string tempGrouping = "";
740             for (int k = 0; k < groupings[j].size()-1; k++) { 
741                 tempGrouping += groupings[j][k]->getGroup() + "-";
742             }
743             tempGrouping += groupings[j][groupings[j].size()-1]->getGroup();
744             groupingsGroups.push_back(tempGrouping);
745         }
746         
747         
748                 //for each otu
749                 for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
750                         
751                         if (m->control_pressed) { return values; }
752                         
753                         vector<float> terms; 
754                         float AijDenominator = 0.0;
755                         vector<float> Bij;
756                         
757                         //get overall abundance of each grouping
758                         for (int j = 0; j < groupings.size(); j++) {
759                                 
760                                 float totalAbund = 0;
761                                 int numNotZero = 0;
762                                 for (int k = 0; k < groupings[j].size(); k++) { 
763                                         vector<int> temp; temp.push_back(j); temp.push_back(k);
764                                         it = groupingsMap.find(temp);
765                                         
766                                         if (it == groupingsMap.end()) { //this one didnt get moved
767                                                 totalAbund += groupings[j][k]->getAbundance(i); 
768                                                 if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
769                                         }else {
770                                                 totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i); 
771                                                 if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
772                                         }
773                                         
774                                 }
775                                 
776                                 //mean abundance
777                                 float Aij = (totalAbund / (float) groupings[j].size());
778                                 terms.push_back(Aij);
779                                 
780                                 //percentage of sites represented
781                                 Bij.push_back(numNotZero / (float) groupings[j].size());
782                                 
783                                 AijDenominator += Aij;
784                         }
785                         
786                         float maxIndVal = 0.0;
787             string maxGrouping = "";
788                         for (int j = 0; j < terms.size(); j++) { 
789                                 float thisAij = (terms[j] / AijDenominator); //relative abundance
790                                 float thisValue = thisAij * Bij[j] * 100.0;
791                                 
792                                 //save largest
793                                 if (thisValue > maxIndVal) { maxIndVal = thisValue;  maxGrouping = groupingsGroups[j]; }
794                         }
795                         
796                         values.push_back(maxIndVal);
797             indicatorGroupings.push_back(maxGrouping);
798                 }
799                 
800                 return values;
801         }
802         catch(exception& e) {
803                 m->errorOut(e, "IndicatorCommand", "getValues");        
804                 exit(1);
805         }
806 }
807 //**********************************************************************************************************************
808 //same as above, just data type difference
809 vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundVector*> >& groupings, vector<string>& indicatorGroupings, map< vector<int>, vector<int> > groupingsMap){
810         try {
811                 vector<float> values;
812                 map< vector<int>, vector<int> >::iterator it;
813         
814         indicatorGroupings.clear();
815         
816         //create grouping strings
817         vector<string> groupingsGroups;
818         for (int j = 0; j < groupings.size(); j++) {
819             string tempGrouping = "";
820             for (int k = 0; k < groupings[j].size()-1; k++) { 
821                 tempGrouping += groupings[j][k]->getGroup() + "-";
822             }
823             tempGrouping += groupings[j][groupings[j].size()-1]->getGroup();
824             groupingsGroups.push_back(tempGrouping);
825         }
826
827                 
828                 //for each otu
829                 for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
830                         vector<float> terms; 
831                         float AijDenominator = 0.0;
832                         vector<float> Bij;
833                         //get overall abundance of each grouping
834                         for (int j = 0; j < groupings.size(); j++) {
835         
836                                 int totalAbund = 0.0;
837                                 int numNotZero = 0;
838                                 for (int k = 0; k < groupings[j].size(); k++) { 
839                                         vector<int> temp; temp.push_back(j); temp.push_back(k);
840                                         it = groupingsMap.find(temp);
841                                         
842                                         if (it == groupingsMap.end()) { //this one didnt get moved
843                                                 totalAbund += groupings[j][k]->getAbundance(i); 
844                                                 if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
845                                         }else {
846                                                 totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i); 
847                                                 if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
848                                         }
849                                         
850                                 }
851                                 
852                                 //mean abundance        
853                                 float Aij = (totalAbund / (float) groupings[j].size());
854                                 terms.push_back(Aij);
855                                 
856                                 //percentage of sites represented
857                                 Bij.push_back(numNotZero / (float) groupings[j].size());
858                                 
859                                 AijDenominator += Aij;
860                         }
861                         
862                         float maxIndVal = 0.0;
863             string maxGrouping = "";
864                         for (int j = 0; j < terms.size(); j++) { 
865                                 float thisAij = (terms[j] / AijDenominator); //relative abundance
866                                 float thisValue = thisAij * Bij[j] * 100.0;
867                                         
868                                 //save largest
869                                 if (thisValue > maxIndVal) { maxIndVal = thisValue;  maxGrouping = groupingsGroups[j]; }
870                         }
871                         
872                         values.push_back(maxIndVal);
873             indicatorGroupings.push_back(maxGrouping);
874                 }
875                 
876                 return values;
877         }
878         catch(exception& e) {
879                 m->errorOut(e, "IndicatorCommand", "getValues");        
880                 exit(1);
881         }
882 }
883 //**********************************************************************************************************************
884 //you need the distances to root to decide groupings
885 //this will also set branch lengths if the tree does not include them
886 map<int, float> IndicatorCommand::getDistToRoot(Tree*& T){
887         try {
888                 map<int, float> dists;
889                 
890                 bool hasBranchLengths = false;
891                 for (int i = 0; i < T->getNumNodes(); i++) { 
892                         if (T->tree[i].getBranchLength() > 0.0) {  hasBranchLengths = true; break; }
893                 }
894                 
895                 //set branchlengths if needed
896                 if (!hasBranchLengths) { 
897                         for (int i = 0; i < T->getNumNodes(); i++) {
898                                 
899                                 int lc = T->tree[i].getLChild();
900                                 int rc = T->tree[i].getRChild();
901                                 
902                                 if (lc == -1) { // you are a leaf
903                                         //if you are a leaf set you priliminary length to 1.0, this may adjust later
904                                         T->tree[i].setBranchLength(1.0);
905                                         dists[i] = 1.0;
906                                 }else{ // you are an internal node
907                                         //look at your children's length to leaf
908                                         float ldist = dists[lc];
909                                         float rdist = dists[rc];
910                                         
911                                         float greater = ldist;
912                                         if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0;}
913                                         else { dists[i] = rdist + 1.0; }
914                                         
915                                         
916                                         //branch length = difference + 1
917                                         T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0));
918                                         T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0));
919                                 }
920                         }
921                 }
922                         
923                 dists.clear();
924                 
925                 for (int i = 0; i < T->getNumNodes(); i++) {
926                         
927                         double sum = 0.0;
928                         int index = i;
929                         
930                         while(T->tree[index].getParent() != -1){
931                                 if (T->tree[index].getBranchLength() != -1) {
932                                         sum += abs(T->tree[index].getBranchLength()); 
933                                 }
934                                 index = T->tree[index].getParent();
935                         }
936                         
937                         dists[i] = sum;
938                 }
939                 
940                 return dists;
941         }
942         catch(exception& e) {
943                 m->errorOut(e, "IndicatorCommand", "getLengthToLeaf");  
944                 exit(1);
945         }
946 }
947 //**********************************************************************************************************************
948 set<string> IndicatorCommand::getDescendantList(Tree*& T, int i, map<int, set<string> > descendants, map<int, set<int> >& nodes){
949         try {
950                 set<string> names;
951                 
952                 set<string>::iterator it;
953                 
954                 int lc = T->tree[i].getLChild();
955                 int rc = T->tree[i].getRChild();
956                 
957                 if (lc == -1) { //you are a leaf your only descendant is yourself
958                         set<int> temp; temp.insert(i);
959                         nodes[i] = temp;
960                         
961                         if (designfile == "") {
962                                 names.insert(T->tree[i].getName());
963                         }else {
964                                 vector<string> myGroup; myGroup.push_back(T->tree[i].getName());
965                                 vector<string> myReps = designMap->getNamesSeqs(myGroup);
966                                 for (int k = 0; k < myReps.size(); k++) {
967                                         names.insert(myReps[k]);
968                                 }
969                         }
970                         
971                 }else{ //your descedants are the combination of your childrens descendants
972                         names = descendants[lc];
973                         nodes[i] = nodes[lc];
974                         for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
975                                 names.insert(*it);
976                         }
977                         for (set<int>::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) {
978                                 nodes[i].insert(*itNum);
979                         }
980                         //you are your own descendant
981                         nodes[i].insert(i);
982                 }
983                 
984                 return names;
985         }
986         catch(exception& e) {
987                 m->errorOut(e, "IndicatorCommand", "getDescendantList");        
988                 exit(1);
989         }
990 }
991 //**********************************************************************************************************************
992 int IndicatorCommand::getShared(){
993         try {
994                 InputData* input = new InputData(sharedfile, "sharedfile");
995                 lookup = input->getSharedRAbundVectors();
996                 string lastLabel = lookup[0]->getLabel();
997                 
998                 if (label == "") { label = lastLabel; delete input; return 0; }
999                 
1000                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
1001                 set<string> labels; labels.insert(label);
1002                 set<string> processedLabels;
1003                 set<string> userLabels = labels;
1004                 
1005                 //as long as you are not at the end of the file or done wih the lines you want
1006                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
1007                         if (m->control_pressed) {  delete input; return 0;  }
1008                         
1009                         if(labels.count(lookup[0]->getLabel()) == 1){
1010                                 processedLabels.insert(lookup[0]->getLabel());
1011                                 userLabels.erase(lookup[0]->getLabel());
1012                                 break;
1013                         }
1014                         
1015                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
1016                                 string saveLabel = lookup[0]->getLabel();
1017                                 
1018                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
1019                                 lookup = input->getSharedRAbundVectors(lastLabel);
1020                                 
1021                                 processedLabels.insert(lookup[0]->getLabel());
1022                                 userLabels.erase(lookup[0]->getLabel());
1023                                 
1024                                 //restore real lastlabel to save below
1025                                 lookup[0]->setLabel(saveLabel);
1026                                 break;
1027                         }
1028                         
1029                         lastLabel = lookup[0]->getLabel();                      
1030                         
1031                         //get next line to process
1032                         //prevent memory leak
1033                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
1034                         lookup = input->getSharedRAbundVectors();
1035                 }
1036                 
1037                 
1038                 if (m->control_pressed) { delete input; return 0;  }
1039                 
1040                 //output error messages about any remaining user labels
1041                 set<string>::iterator it;
1042                 bool needToRun = false;
1043                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
1044                         m->mothurOut("Your file does not include the label " + *it); 
1045                         if (processedLabels.count(lastLabel) != 1) {
1046                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
1047                                 needToRun = true;
1048                         }else {
1049                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
1050                         }
1051                 }
1052                 
1053                 //run last label if you need to
1054                 if (needToRun == true)  {
1055                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
1056                         lookup = input->getSharedRAbundVectors(lastLabel);
1057                 }       
1058                 
1059                 delete input;
1060                 return 0;
1061         }
1062         catch(exception& e) {
1063                 m->errorOut(e, "IndicatorCommand", "getShared");        
1064                 exit(1);
1065         }
1066 }
1067 //**********************************************************************************************************************
1068 int IndicatorCommand::getSharedFloat(){
1069         try {
1070                 InputData* input = new InputData(relabundfile, "relabund");
1071                 lookupFloat = input->getSharedRAbundFloatVectors();
1072                 string lastLabel = lookupFloat[0]->getLabel();
1073                 
1074                 if (label == "") { label = lastLabel; delete input; return 0; }
1075                 
1076                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
1077                 set<string> labels; labels.insert(label);
1078                 set<string> processedLabels;
1079                 set<string> userLabels = labels;
1080                 
1081                 //as long as you are not at the end of the file or done wih the lines you want
1082                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
1083                         
1084                         if (m->control_pressed) {  delete input; return 0;  }
1085                         
1086                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
1087                                 processedLabels.insert(lookupFloat[0]->getLabel());
1088                                 userLabels.erase(lookupFloat[0]->getLabel());
1089                                 break;
1090                         }
1091                         
1092                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
1093                                 string saveLabel = lookupFloat[0]->getLabel();
1094                                 
1095                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
1096                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
1097                                 
1098                                 processedLabels.insert(lookupFloat[0]->getLabel());
1099                                 userLabels.erase(lookupFloat[0]->getLabel());
1100                                 
1101                                 //restore real lastlabel to save below
1102                                 lookupFloat[0]->setLabel(saveLabel);
1103                                 break;
1104                         }
1105                         
1106                         lastLabel = lookupFloat[0]->getLabel();                 
1107                         
1108                         //get next line to process
1109                         //prevent memory leak
1110                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
1111                         lookupFloat = input->getSharedRAbundFloatVectors();
1112                 }
1113                 
1114                 
1115                 if (m->control_pressed) { delete input; return 0;  }
1116                 
1117                 //output error messages about any remaining user labels
1118                 set<string>::iterator it;
1119                 bool needToRun = false;
1120                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
1121                         m->mothurOut("Your file does not include the label " + *it); 
1122                         if (processedLabels.count(lastLabel) != 1) {
1123                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
1124                                 needToRun = true;
1125                         }else {
1126                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
1127                         }
1128                 }
1129                 
1130                 //run last label if you need to
1131                 if (needToRun == true)  {
1132                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
1133                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
1134                 }       
1135                 
1136                 delete input;
1137                 return 0;
1138         }
1139         catch(exception& e) {
1140                 m->errorOut(e, "IndicatorCommand", "getShared");        
1141         exit(1);
1142         }
1143 }
1144 //**********************************************************************************************************************
1145 vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*> >& groupings, int num, vector<float> indicatorValues, int numIters){
1146         try {
1147                 vector<float> pvalues;
1148                 pvalues.resize(indicatorValues.size(), 0);
1149         vector<string> notUsedGroupings;  //we dont care about the grouping for the pvalues since they are randomized, but we need to pass the function something to make it work.
1150                 
1151                 for(int i=0;i<numIters;i++){
1152                         if (m->control_pressed) { break; }
1153                         map< vector<int>, vector<int> > groupingsMap = randomizeGroupings(groupings, num);
1154                         vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap);
1155                         
1156                         for (int j = 0; j < indicatorValues.size(); j++) {
1157                                 if (randomIndicatorValues[j] >= indicatorValues[j]) { pvalues[j]++; }
1158                         }
1159                 }
1160                 
1161                 return pvalues;
1162                 
1163         }catch(exception& e) {
1164                 m->errorOut(e, "IndicatorCommand", "driver");   
1165                 exit(1);
1166         }
1167 }
1168 //**********************************************************************************************************************
1169 vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVector*> >& groupings, int num, vector<float> indicatorValues){
1170         try {
1171                 vector<float> pvalues;
1172
1173                 if(processors == 1){
1174                         pvalues = driver(groupings, num, indicatorValues, iters);
1175             for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
1176                 }else{
1177             //divide iters between processors
1178                         vector<int> procIters;
1179             int numItersPerProcessor = iters / processors;
1180             
1181             //divide iters between processes
1182             for (int h = 0; h < processors; h++) {
1183                 if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
1184                 procIters.push_back(numItersPerProcessor);
1185             }
1186             
1187             vector<int> processIDS;
1188             int process = 1;
1189                         
1190 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1191                                                 
1192                         //loop through and create all the processes you want
1193                         while (process != processors) {
1194                                 int pid = fork();
1195                                 
1196                                 if (pid > 0) {
1197                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1198                                         process++;
1199                                 }else if (pid == 0){
1200                                         pvalues = driver(groupings, num, indicatorValues, procIters[process]);
1201                                         
1202                                         //pass pvalues to parent
1203                                         ofstream out;
1204                                         string tempFile = toString(getpid()) + ".pvalues.temp";
1205                                         m->openOutputFile(tempFile, out);
1206                                         
1207                                         //pass values
1208                                         for (int i = 0; i < pvalues.size(); i++) {
1209                                                 out << pvalues[i] << '\t';
1210                                         }
1211                                         out << endl;
1212                                         
1213                                         out.close();
1214                                         
1215                                         exit(0);
1216                                 }else { 
1217                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1218                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1219                                         exit(0);
1220                                 }
1221                         }
1222                         
1223                         //do my part
1224                         pvalues = driver(groupings, num, indicatorValues, procIters[0]);
1225                         
1226                         //force parent to wait until all the processes are done
1227                         for (int i=0;i<processIDS.size();i++) { 
1228                                 int temp = processIDS[i];
1229                                 wait(&temp);
1230                         }
1231                         
1232                         //combine results                       
1233                         for (int i = 0; i < processIDS.size(); i++) {
1234                                 ifstream in;
1235                                 string tempFile =  toString(processIDS[i]) + ".pvalues.temp";
1236                                 m->openInputFile(tempFile, in);
1237                                 
1238                                 ////// to do ///////////
1239                                 int numTemp; numTemp = 0; 
1240                                 for (int j = 0; j < pvalues.size(); j++) {
1241                                         in >> numTemp; m->gobble(in);
1242                                         pvalues[j] += numTemp;
1243                                 }
1244                                 in.close(); m->mothurRemove(tempFile);
1245                         }
1246                         for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
1247 #else
1248                        
1249             //fill in functions
1250             vector<indicatorData*> pDataArray;
1251             DWORD   dwThreadIdArray[processors-1];
1252             HANDLE  hThreadArray[processors-1];
1253             
1254             //Create processor worker threads.
1255             for( int i=1; i<processors; i++ ){
1256                 
1257                 //make copy of lookup so we don't get access violations
1258                 vector< vector<SharedRAbundFloatVector*> > newGroupings;
1259
1260                 for (int k = 0; k < groupings.size(); k++) {
1261                     vector<SharedRAbundFloatVector*> newLookup;
1262                     for (int l = 0; l < groupings[k].size(); l++) {
1263                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
1264                         temp->setLabel(groupings[k][l]->getLabel());
1265                         temp->setGroup(groupings[k][l]->getGroup());
1266                         newLookup.push_back(temp);
1267                     }
1268                     newGroupings.push_back(newLookup);
1269                 }
1270                 
1271                 //for each bin
1272                 for (int l = 0; l < groupings.size(); l++) {
1273                     for (int k = 0; k < groupings[l][0]->getNumBins(); k++) {
1274                         if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u];  } } return pvalues; }
1275                         
1276                         for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back(groupings[l][j]->getAbundance(k), groupings[l][j]->getGroup()); }
1277                     }
1278                 }
1279         
1280                 vector<float> copyIValues = indicatorValues;
1281
1282                 indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues);
1283                 pDataArray.push_back(temp);
1284                 processIDS.push_back(i);
1285                 
1286                 hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1287             }
1288             
1289             //do my part
1290                         pvalues = driver(groupings, num, indicatorValues, procIters[0]);
1291            
1292             //Wait until all threads have terminated.
1293             WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1294             
1295             //Close all thread handles and free memory allocations.
1296             for(int i=0; i < pDataArray.size(); i++){
1297                 for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j];  }
1298                 
1299                 for (int l = 0; l < pDataArray[i]->groupings.size(); l++) {
1300                     for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; }
1301                 }
1302                 
1303                 CloseHandle(hThreadArray[i]);
1304                 delete pDataArray[i];
1305             }
1306             
1307             for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
1308 #endif
1309                 }
1310
1311                 
1312                 return pvalues;
1313         }
1314         catch(exception& e) {
1315                 m->errorOut(e, "IndicatorCommand", "getPValues");       
1316                 exit(1);
1317         }
1318 }
1319
1320 //**********************************************************************************************************************
1321 //same as above, just data type difference
1322 vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& groupings, int num, vector<float> indicatorValues, int numIters){
1323         try {
1324                 vector<float> pvalues;
1325                 pvalues.resize(indicatorValues.size(), 0);
1326         vector<string> notUsedGroupings;  //we dont care about the grouping for the pvalues since they are randomized, but we need to pass the function something to make it work.
1327                 
1328                 for(int i=0;i<numIters;i++){
1329                         if (m->control_pressed) { break; }
1330                         map< vector<int>, vector<int> > groupingsMap = randomizeGroupings(groupings, num);
1331                         vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap);
1332                         
1333                         for (int j = 0; j < indicatorValues.size(); j++) {
1334                                 if (randomIndicatorValues[j] >= indicatorValues[j]) { pvalues[j]++; }
1335                         }
1336                 }
1337                 
1338                 return pvalues;
1339                 
1340         }catch(exception& e) {
1341                 m->errorOut(e, "IndicatorCommand", "driver");   
1342                 exit(1);
1343         }
1344 }
1345 //**********************************************************************************************************************
1346 //same as above, just data type difference
1347 vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >& groupings, int num, vector<float> indicatorValues){
1348         try {
1349                 vector<float> pvalues;
1350         
1351                 if(processors == 1){
1352                         pvalues = driver(groupings, num, indicatorValues, iters);
1353             for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
1354                 }else{
1355             //divide iters between processors
1356                         vector<int> procIters;
1357             int numItersPerProcessor = iters / processors;
1358             
1359             //divide iters between processes
1360             for (int h = 0; h < processors; h++) {
1361                 if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
1362                 procIters.push_back(numItersPerProcessor);
1363             }
1364             
1365             vector<int> processIDS;
1366             int process = 1;
1367                         
1368 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1369             
1370                         //loop through and create all the processes you want
1371                         while (process != processors) {
1372                                 int pid = fork();
1373                                 
1374                                 if (pid > 0) {
1375                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1376                                         process++;
1377                                 }else if (pid == 0){
1378                                         pvalues = driver(groupings, num, indicatorValues, procIters[process]);
1379                                         
1380                                         //pass pvalues to parent
1381                                         ofstream out;
1382                                         string tempFile = toString(getpid()) + ".pvalues.temp";
1383                                         m->openOutputFile(tempFile, out);
1384                                         
1385                                         //pass values
1386                                         for (int i = 0; i < pvalues.size(); i++) {
1387                                                 out << pvalues[i] << '\t';
1388                                         }
1389                                         out << endl;
1390                                         
1391                                         out.close();
1392                                         
1393                                         exit(0);
1394                                 }else {
1395                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1396                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1397                                         exit(0);
1398                                 }
1399                         }
1400                         
1401                         //do my part
1402                         pvalues = driver(groupings, num, indicatorValues, procIters[0]);
1403                         
1404                         //force parent to wait until all the processes are done
1405                         for (int i=0;i<processIDS.size();i++) {
1406                                 int temp = processIDS[i];
1407                                 wait(&temp);
1408                         }
1409                         
1410                         //combine results
1411                         for (int i = 0; i < processIDS.size(); i++) {
1412                                 ifstream in;
1413                                 string tempFile =  toString(processIDS[i]) + ".pvalues.temp";
1414                                 m->openInputFile(tempFile, in);
1415                                 
1416                                 ////// to do ///////////
1417                                 int numTemp; numTemp = 0;
1418                                 for (int j = 0; j < pvalues.size(); j++) {
1419                                         in >> numTemp; m->gobble(in);
1420                                         pvalues[j] += numTemp;
1421                                 }
1422                                 in.close(); m->mothurRemove(tempFile);
1423                         }
1424                         for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
1425 #else
1426             
1427             //fill in functions
1428             vector<indicatorData*> pDataArray;
1429             DWORD   dwThreadIdArray[processors-1];
1430             HANDLE  hThreadArray[processors-1];
1431             
1432             //Create processor worker threads.
1433             for( int i=1; i<processors; i++ ){
1434                 
1435                 //make copy of lookup so we don't get access violations
1436                 vector< vector<SharedRAbundFloatVector*> > newGroupings;
1437                 
1438                 for (int k = 0; k < groupings.size(); k++) {
1439                     vector<SharedRAbundFloatVector*> newLookup;
1440                     for (int l = 0; l < groupings[k].size(); l++) {
1441                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
1442                         temp->setLabel(groupings[k][l]->getLabel());
1443                         temp->setGroup(groupings[k][l]->getGroup());
1444                         newLookup.push_back(temp);
1445                     }
1446                     newGroupings.push_back(newLookup);
1447                 }
1448                 
1449                 //for each bin
1450                 for (int l = 0; l < groupings.size(); l++) {
1451                     for (int k = 0; k < groupings[l][0]->getNumBins(); k++) {
1452                         if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u];  } } return pvalues; }
1453                         
1454                         for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back((float)(groupings[l][j]->getAbundance(k)), groupings[l][j]->getGroup()); }
1455                     }
1456                 }
1457                 
1458                 vector<float> copyIValues = indicatorValues;
1459                 
1460                 indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues);
1461                 pDataArray.push_back(temp);
1462                 processIDS.push_back(i);
1463                 
1464                 hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1465             }
1466             
1467             //do my part
1468                         pvalues = driver(groupings, num, indicatorValues, procIters[0]);
1469             
1470             //Wait until all threads have terminated.
1471             WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1472             
1473             //Close all thread handles and free memory allocations.
1474             for(int i=0; i < pDataArray.size(); i++){
1475                 for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j];  }
1476                 
1477                 for (int l = 0; l < pDataArray[i]->groupings.size(); l++) {
1478                     for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; }
1479                 }
1480                 
1481                 CloseHandle(hThreadArray[i]);
1482                 delete pDataArray[i];
1483             }
1484             
1485             for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
1486 #endif
1487                 }
1488         
1489                 
1490                 return pvalues;
1491         }
1492         catch(exception& e) {
1493                 m->errorOut(e, "IndicatorCommand", "getPValues");
1494                 exit(1);
1495         }
1496 }
1497 //**********************************************************************************************************************
1498 //swap groups between groupings, in essence randomizing the second column of the design file
1499 map< vector<int>, vector<int> > IndicatorCommand::randomizeGroupings(vector< vector<SharedRAbundVector*> >& groupings, int numLookupGroups){
1500         try {
1501                 
1502                 map< vector<int>, vector<int> > randomGroupings; 
1503                 
1504                 for (int i = 0; i < numLookupGroups; i++) {
1505                         if (m->control_pressed) {break;}
1506                         
1507                         //get random groups to swap to switch with
1508                         //generate random int between 0 and groupings.size()-1
1509                         int z = m->getRandomIndex(groupings.size()-1);
1510                         int x = m->getRandomIndex(groupings.size()-1);
1511                         int a = m->getRandomIndex(groupings[z].size()-1);
1512                         int b = m->getRandomIndex(groupings[x].size()-1);
1513                         //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;        
1514                         //if ((z < 0) || (z > 1) || x<0 || x>1 || a <0 || a>groupings[z].size()-1 || b<0 || b>groupings[x].size()-1) { cout << "probelm" << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;      }
1515                         
1516                         vector<int> from;
1517                         vector<int> to;
1518                         
1519                         from.push_back(z); from.push_back(a);
1520                         to.push_back(x); to.push_back(b);
1521                         
1522                         randomGroupings[from] = to;
1523                 }
1524                 //cout << "done" << endl;       
1525                 return randomGroupings;
1526         }
1527         catch(exception& e) {
1528                 m->errorOut(e, "IndicatorCommand", "randomizeGroupings");       
1529                 exit(1);
1530         }
1531 }       
1532 //**********************************************************************************************************************
1533 //swap groups between groupings, in essence randomizing the second column of the design file
1534 map< vector<int>, vector<int> > IndicatorCommand::randomizeGroupings(vector< vector<SharedRAbundFloatVector*> >& groupings, int numLookupGroups){
1535         try {
1536                 
1537                 map< vector<int>, vector<int> > randomGroupings; 
1538                 
1539                 for (int i = 0; i < numLookupGroups; i++) {
1540                         
1541                         //get random groups to swap to switch with
1542                         //generate random int between 0 and groupings.size()-1
1543                         int z = m->getRandomIndex(groupings.size()-1);
1544                         int x = m->getRandomIndex(groupings.size()-1);
1545                         int a = m->getRandomIndex(groupings[z].size()-1);
1546                         int b = m->getRandomIndex(groupings[x].size()-1);
1547                         //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;                
1548                         
1549                         vector<int> from;
1550                         vector<int> to;
1551                         
1552                         from.push_back(z); from.push_back(a);
1553                         to.push_back(x); to.push_back(b);
1554                         
1555                         randomGroupings[from] = to;
1556                 }
1557                 
1558                 return randomGroupings;
1559         }
1560         catch(exception& e) {
1561                 m->errorOut(e, "IndicatorCommand", "randomizeGroupings");       
1562                 exit(1);
1563         }
1564 }                       
1565                                                                 
1566 /*****************************************************************/
1567
1568