]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
26         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
27         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
28                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
29                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){ 
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){  
69         try {
70         string outputFileName = "";
71                 map<string, vector<string> >::iterator it;
72         
73         //is this a type this command creates
74         it = outputTypes.find(type);
75         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76         else {
77             if (type == "weighted")            {   outputFileName =  "weighted";   }
78             else if (type == "wsummary")        {   outputFileName =  "wsummary";   }
79             else if (type == "phylip")           {   outputFileName =  "dist";   }
80             else if (type == "column")           {   outputFileName =  "dist";   }
81             else if (type == "tree")             {   outputFileName =  "tre";   }
82             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
83         }
84         return outputFileName;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 UnifracWeightedCommand::UnifracWeightedCommand(){       
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["weighted"] = tempOutNames;
98                 outputTypes["wsummary"] = tempOutNames;
99                 outputTypes["phylip"] = tempOutNames;
100                 outputTypes["column"] = tempOutNames;
101         outputTypes["tree"] = tempOutNames;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
105                 exit(1);
106         }
107 }
108
109 /***********************************************************/
110 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
111         try {
112                 abort = false; calledHelp = false;   
113                         
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters=parser.getParameters();
123                         map<string,string>::iterator it;
124                         
125                         ValidParameters validParameter;
126                 
127                         //check to make sure all parameters are valid for command
128                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["weighted"] = tempOutNames;
135                         outputTypes["wsummary"] = tempOutNames;
136                         outputTypes["phylip"] = tempOutNames;
137                         outputTypes["column"] = tempOutNames;
138             outputTypes["tree"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("tree");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
151                                 }
152                                 
153                                 it = parameters.find("group");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168                 
169                 it = parameters.find("count");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
175                                 }
176                         }
177                         
178                         //check for required parameters
179                         treefile = validParameter.validFile(parameters, "tree", true);
180                         if (treefile == "not open") { treefile = ""; abort = true; }
181                         else if (treefile == "not found") {                             //if there is a current design file, use it
182                                 treefile = m->getTreeFile(); 
183                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
184                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
185                         }else { m->setTreeFile(treefile); }     
186                         
187                         //check for required parameters
188                         groupfile = validParameter.validFile(parameters, "group", true);
189                         if (groupfile == "not open") { abort = true; }
190                         else if (groupfile == "not found") { groupfile = ""; }
191                         else { m->setGroupFile(groupfile); }
192                         
193                         namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not open") { namefile = ""; abort = true; }
195                         else if (namefile == "not found") { namefile = ""; }
196                         else { m->setNameFile(namefile); }
197                         
198             countfile = validParameter.validFile(parameters, "count", true);
199                         if (countfile == "not open") { countfile = ""; abort = true; }
200                         else if (countfile == "not found") { countfile = "";  } 
201                         else { m->setCountTableFile(countfile); }
202             
203             if ((namefile != "") && (countfile != "")) {
204                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
205             }
206                         
207             if ((groupfile != "") && (countfile != "")) {
208                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
209             }
210
211                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
212                         
213                                                                                                                                         
214                         //check for optional parameter and set defaults
215                         // ...at some point should added some additional type checking...
216                         groups = validParameter.validFile(parameters, "groups", false);                 
217                         if (groups == "not found") { groups = ""; }
218                         else { 
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                                 
223                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
224                         m->mothurConvert(itersString, iters); 
225                         
226                         string temp = validParameter.validFile(parameters, "distance", false);                  
227                         if (temp == "not found") { phylip = false; outputForm = ""; }
228                         else{
229                 if (temp=="phylip") { temp = "lt"; }
230                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
231                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
232                         }
233                         
234                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
235                         random = m->isTrue(temp);
236                         
237                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
238                         includeRoot = m->isTrue(temp);
239                         
240                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
241                         m->setProcessors(temp);
242                         m->mothurConvert(temp, processors);
243             
244             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
245                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
246             else {  
247                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
248                 else { subsample = false; }
249             }
250                         
251             if (!subsample) { subsampleIters = 0;   }
252             else { subsampleIters = iters;          }
253             
254             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
255                         consensus = m->isTrue(temp);
256             
257                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
258                         if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
259             else {  
260                 CountTable testCt; 
261                 if ((!testCt.testGroups(countfile)) && (subsample)) {
262                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
263                 }
264             }
265             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
267             
268                         if (countfile=="") {
269                 if (namefile == "") {
270                     vector<string> files; files.push_back(treefile);
271                     parser.getNameFile(files);
272                 } 
273             }
274                 }
275                 
276                 
277         }
278         catch(exception& e) {
279                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
280                 exit(1);
281         }
282 }
283 /***********************************************************/
284 int UnifracWeightedCommand::execute() {
285         try {
286         
287                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
288                 
289                 m->setTreeFile(treefile);
290                 
291         TreeReader* reader;
292         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
293         else { reader = new TreeReader(treefile, countfile); }
294         T = reader->getTrees();
295         ct = T[0]->getCountTable();
296         delete reader;
297         
298         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
299                                 
300                 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
301                 m->openOutputFile(sumFile, outSum);
302                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
303                 
304         SharedUtil util;
305                 string s; //to make work with setgroups
306                 Groups = m->getGroups();
307                 vector<string> nameGroups = ct->getNamesOfGroups();
308                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
309                 m->setGroups(Groups);
310                 
311         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
312         
313                 Weighted weighted(includeRoot);
314                         
315                 int start = time(NULL);
316             
317         //set or check size
318         if (subsample) {
319             //user has not set size, set size = smallest samples size
320             if (subsampleSize == -1) { 
321                 vector<string> temp; temp.push_back(Groups[0]);
322                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
323                 for (int i = 1; i < Groups.size(); i++) {
324                     int thisSize = ct->getGroupCount(Groups[i]);
325                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
326                 }
327                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
328             }else { //eliminate any too small groups
329                 vector<string> newGroups = Groups;
330                 Groups.clear();
331                 for (int i = 0; i < newGroups.size(); i++) {
332                     int thisSize = ct->getGroupCount(newGroups[i]);
333                     
334                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
335                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
336                 } 
337                 m->setGroups(Groups);
338             }
339         }
340         
341         //here in case some groups are removed by subsample
342         util.getCombos(groupComb, Groups, numComp);
343         
344         if (numComp < processors) { processors = numComp; }
345         
346         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
347         
348         //get weighted scores for users trees
349         for (int i = 0; i < T.size(); i++) {
350             
351             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
352             
353             counter = 0;
354             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
355             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
356             
357             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
358             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
359             
360             if (random) {  
361                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);  
362                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
363                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
364             } 
365             
366             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
367             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
368             
369             //save users score
370             for (int s=0; s<numComp; s++) {
371                 //add users score to vector of user scores
372                 uScores[s].push_back(userData[s]);
373                 //save users tree score for summary file
374                 utreeScores.push_back(userData[s]);
375             }
376             
377             if (random) {  runRandomCalcs(T[i], userData); }
378             
379             //clear data
380             rScores.clear();
381             uScores.clear();
382             validScores.clear();
383             
384             //subsample loop
385             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
386             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
387                 
388                 if (m->control_pressed) { break; }
389                 
390                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
391                 CountTable* newCt = new CountTable();
392                 
393                 //uses method of setting groups to doNotIncludeMe
394                 SubSample sample;
395                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
396                
397                 //call new weighted function
398                 vector<double> iterData; iterData.resize(numComp,0);
399                 Weighted thisWeighted(includeRoot);
400                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
401                 
402                 //save data to make ave dist, std dist
403                 calcDistsTotals.push_back(iterData);
404                 
405                 delete newCt;
406                 delete subSampleTree;
407                 
408                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
409             }
410             
411             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
412             
413             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
414             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
415         }
416         
417                 
418                 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0;  }
419                 
420         if (phylip) {   createPhylipFile();             }
421     
422                 printWSummaryFile();
423                 
424                 //clear out users groups
425                 m->clearGroups();
426                 delete ct; 
427                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
428                 
429                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
430                 
431                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
432                 
433                 //set phylip file as new current phylipfile
434                 string current = "";
435                 itTypes = outputTypes.find("phylip");
436                 if (itTypes != outputTypes.end()) {
437                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
438                 }
439                 
440                 //set column file as new current columnfile
441                 itTypes = outputTypes.find("column");
442                 if (itTypes != outputTypes.end()) {
443                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
444                 }
445                 
446                 m->mothurOutEndLine();
447                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
448                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
449                 m->mothurOutEndLine();
450                 
451                 return 0;
452                 
453         }
454         catch(exception& e) {
455                 m->errorOut(e, "UnifracWeightedCommand", "execute");
456                 exit(1);
457         }
458 }
459 /**************************************************************************************************/
460 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
461         try {
462         //we need to find the average distance and standard deviation for each groups distance
463         
464         //finds sum
465         vector<double> averages; averages.resize(numComp, 0); 
466         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
467             for (int i = 0; i < dists[thisIter].size(); i++) {  
468                 averages[i] += dists[thisIter][i];
469             }
470         }
471         
472         //finds average.
473         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
474         
475         //find standard deviation
476         vector<double> stdDev; stdDev.resize(numComp, 0);
477                 
478         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
479             for (int j = 0; j < dists[thisIter].size(); j++) {
480                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
481             }
482         }
483         for (int i = 0; i < stdDev.size(); i++) {  
484             stdDev[i] /= (float) subsampleIters; 
485             stdDev[i] = sqrt(stdDev[i]);
486         }
487         
488         //make matrix with scores in it
489         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
490         for (int i = 0; i < m->getNumGroups(); i++) {
491             avedists[i].resize(m->getNumGroups(), 0.0);
492         }
493         
494         //make matrix with scores in it
495         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
496         for (int i = 0; i < m->getNumGroups(); i++) {
497             stddists[i].resize(m->getNumGroups(), 0.0);
498         }
499         
500         //flip it so you can print it
501         int count = 0;
502         for (int r=0; r<m->getNumGroups(); r++) { 
503             for (int l = 0; l < r; l++) {
504                 avedists[r][l] = averages[count];
505                 avedists[l][r] = averages[count];
506                 stddists[r][l] = stdDev[count];
507                 stddists[l][r] = stdDev[count];
508                 count++;
509             }
510         }
511         
512         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
513         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
514         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
515         ofstream out;
516         m->openOutputFile(aveFileName, out);
517         
518         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
519         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
520         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
521         ofstream outStd;
522         m->openOutputFile(stdFileName, outStd);
523         
524         if ((outputForm == "lt") || (outputForm == "square")) {
525             //output numSeqs
526             out << m->getNumGroups() << endl;
527             outStd << m->getNumGroups() << endl;
528         }
529         
530         //output to file
531         for (int r=0; r<m->getNumGroups(); r++) { 
532             //output name
533             string name = (m->getGroups())[r];
534             if (name.length() < 10) { //pad with spaces to make compatible
535                 while (name.length() < 10) {  name += " ";  }
536             }
537             
538             if (outputForm == "lt") {
539                 out << name << '\t';
540                 outStd << name << '\t';
541                 
542                 //output distances
543                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
544                 out << endl;  outStd << endl;
545             }else if (outputForm == "square") {
546                 out << name << '\t';
547                 outStd << name << '\t';
548                 
549                 //output distances
550                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
551                 out << endl; outStd << endl;
552             }else{
553                 //output distances
554                 for (int l = 0; l < r; l++) {   
555                     string otherName = (m->getGroups())[l];
556                     if (otherName.length() < 10) { //pad with spaces to make compatible
557                         while (otherName.length() < 10) {  otherName += " ";  }
558                     }
559                     
560                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
561                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
562                 }
563             }
564         }
565         out.close();
566         outStd.close();
567         
568         return 0;
569     }
570         catch(exception& e) {
571                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
572                 exit(1);
573         }
574 }
575
576 /**************************************************************************************************/
577 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
578         try {
579         
580         //used in tree constructor 
581         m->runParse = false;
582         
583         ///create treemap class from groupmap for tree class to use
584         CountTable newCt;
585         set<string> nameMap;
586         map<string, string> groupMap;
587         set<string> gps;
588         for (int i = 0; i < m->getGroups().size(); i++) { 
589             nameMap.insert(m->getGroups()[i]); 
590             gps.insert(m->getGroups()[i]); 
591             groupMap[m->getGroups()[i]] = m->getGroups()[i];
592         }
593         newCt.createTable(nameMap, groupMap, gps);
594         
595         //clear  old tree names if any
596         m->Treenames.clear();
597         
598         //fills globaldatas tree names
599         m->Treenames = m->getGroups();
600         
601         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
602         
603         if (m->control_pressed) { return 0; }
604         
605         Consensus con;
606         Tree* conTree = con.getTree(newTrees);
607         
608         //create a new filename
609         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");                               
610         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
611         ofstream outTree;
612         m->openOutputFile(conFile, outTree);
613         
614         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
615         outTree.close();
616         
617         return 0;
618
619     }
620         catch(exception& e) {
621                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
622                 exit(1);
623         }
624 }
625 /**************************************************************************************************/
626
627 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
628         try {
629         
630         vector<Tree*> trees;
631         
632         //create a new filename
633         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");                             
634         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
635         
636         ofstream outAll;
637         m->openOutputFile(outputFile, outAll);
638         
639
640         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
641             
642             if (m->control_pressed) { break; }
643             
644             //make matrix with scores in it
645             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
646             for (int j = 0; j < m->getNumGroups(); j++) {
647                 sims[j].resize(m->getNumGroups(), 0.0);
648             }
649             
650             int count = 0;
651                         for (int r=0; r<m->getNumGroups(); r++) { 
652                                 for (int l = 0; l < r; l++) {
653                     double sim = -(dists[i][count]-1.0);
654                                         sims[r][l] = sim;
655                                         sims[l][r] = sim;
656                                         count++;
657                                 }
658                         }
659
660             //create tree
661             Tree* tempTree = new Tree(&myct, sims);
662             tempTree->assembleTree();
663             
664             trees.push_back(tempTree);
665             
666             //print tree
667             tempTree->print(outAll);
668         }
669         
670         outAll.close();
671         
672         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
673         
674         return trees;
675     }
676         catch(exception& e) {
677                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
678                 exit(1);
679         }
680 }
681 /**************************************************************************************************/
682
683 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
684         try {
685         
686         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
687         vector< vector<string> > namesOfGroupCombos;
688         for (int a=0; a<numGroups; a++) { 
689             for (int l = 0; l < a; l++) {       
690                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
691                 namesOfGroupCombos.push_back(groups);
692             }
693         }
694         
695         lines.clear();
696         
697 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
698         if(processors != 1){
699             int numPairs = namesOfGroupCombos.size();
700             int numPairsPerProcessor = numPairs / processors;
701             
702             for (int i = 0; i < processors; i++) {
703                 int startPos = i * numPairsPerProcessor;
704                 if(i == processors - 1){
705                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
706                 }
707                 lines.push_back(linePair(startPos, numPairsPerProcessor));
708             }
709         }
710 #endif
711         
712         
713         //get scores for random trees
714         for (int j = 0; j < iters; j++) {
715             cout << j << endl; 
716 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
717             if(processors == 1){
718                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
719             }else{
720                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
721             }
722 #else
723             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
724 #endif
725             
726             if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
727             
728             //report progress
729             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
730         }
731         lines.clear();
732         
733         //find the signifigance of the score for summary file
734         for (int f = 0; f < numComp; f++) {
735             //sort random scores
736             sort(rScores[f].begin(), rScores[f].end());
737             
738             //the index of the score higher than yours is returned 
739             //so if you have 1000 random trees the index returned is 100 
740             //then there are 900 trees with a score greater then you. 
741             //giving you a signifigance of 0.900
742             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
743             
744             //the signifigance is the number of trees with the users score or higher 
745             WScoreSig.push_back((iters-index)/(float)iters);
746         }
747         
748         //out << "Tree# " << i << endl;
749         calculateFreqsCumuls();
750         printWeightedFile();
751         
752         delete output;
753         
754         return 0;
755     }
756         catch(exception& e) {
757                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
758                 exit(1);
759         }
760 }
761 /**************************************************************************************************/
762
763 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
764         try {
765 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
766                 int process = 1;
767                 vector<int> processIDS;
768                 
769                 EstOutput results;
770                 
771                 //loop through and create all the processes you want
772                 while (process != processors) {
773                         int pid = fork();
774                         
775                         if (pid > 0) {
776                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
777                                 process++;
778                         }else if (pid == 0){
779                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
780                         
781                                 //pass numSeqs to parent
782                                 ofstream out;
783                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
784                                 m->openOutputFile(tempFile, out);
785                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
786                                 out.close();
787                                 
788                                 exit(0);
789                         }else { 
790                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
791                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
792                                 exit(0);
793                         }
794                 }
795                 
796                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
797                 
798                 //force parent to wait until all the processes are done
799                 for (int i=0;i<(processors-1);i++) { 
800                         int temp = processIDS[i];
801                         wait(&temp);
802                 }
803                 
804                 //get data created by processes
805                 for (int i=0;i<(processors-1);i++) { 
806         
807                         ifstream in;
808                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
809                         m->openInputFile(s, in);
810                         
811                         double tempScore;
812                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
813                         in.close();
814                         m->mothurRemove(s);
815                 }
816                 
817                 return 0;
818 #endif          
819         }
820         catch(exception& e) {
821                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
822                 exit(1);
823         }
824 }
825
826 /**************************************************************************************************/
827 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
828  try {
829                 Tree* randT = new Tree(ct);
830      
831         Weighted weighted(includeRoot);
832      
833                 for (int h = start; h < (start+num); h++) {
834         
835                         if (m->control_pressed) { return 0; }
836                 
837                         //initialize weighted score
838                         string groupA = namesOfGroupCombos[h][0]; 
839                         string groupB = namesOfGroupCombos[h][1];
840                         
841                         //copy T[i]'s info.
842                         randT->getCopy(t);
843                          
844                         //create a random tree with same topology as T[i], but different labels
845                         randT->assembleRandomUnifracTree(groupA, groupB);
846                         
847                         if (m->control_pressed) { delete randT;  return 0;  }
848
849                         //get wscore of random tree
850                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
851                 
852                         if (m->control_pressed) { delete randT;  return 0;  }
853                                                                                 
854                         //save scores
855                         scores[h].push_back(randomData[0]);
856                 }
857         
858                 delete randT;
859         
860                 return 0;
861
862         }
863         catch(exception& e) {
864                 m->errorOut(e, "UnifracWeightedCommand", "driver");
865                 exit(1);
866         }
867 }
868 /***********************************************************/
869 void UnifracWeightedCommand::printWeightedFile() {
870         try {
871                 vector<double> data;
872                 vector<string> tags;
873                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
874                 
875                 for(int a = 0; a < numComp; a++) {
876                         output->initFile(groupComb[a], tags);
877                         //print each line
878                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
879                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
880                                 output->output(data);
881                                 data.clear();
882                         } 
883                         output->resetFile();
884                 }
885         }
886         catch(exception& e) {
887                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
888                 exit(1);
889         }
890 }
891
892
893 /***********************************************************/
894 void UnifracWeightedCommand::printWSummaryFile() {
895         try {
896                 //column headers
897                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
898                 m->mothurOut("Tree#\tGroups\tWScore\t");
899                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
900                 outSum << endl; m->mothurOutEndLine();
901                 
902                 //format output
903                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
904                 
905                 //print each line
906                 int count = 0;
907                 for (int i = 0; i < T.size(); i++) { 
908                         for (int j = 0; j < numComp; j++) {
909                                 if (random) {
910                                         if (WScoreSig[count] > (1/(float)iters)) {
911                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
912                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
913                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
914                                         }else{
915                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
916                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
917                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
918                                         }
919                                 }else{
920                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
921                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
922                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
923                                 }
924                                 count++;
925                         }
926                 }
927                 outSum.close();
928         }
929         catch(exception& e) {
930                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
931                 exit(1);
932         }
933 }
934 /***********************************************************/
935 void UnifracWeightedCommand::createPhylipFile() {
936         try {
937                 int count = 0;
938                 //for each tree
939                 for (int i = 0; i < T.size(); i++) { 
940                 
941                         string phylipFileName;
942                         if ((outputForm == "lt") || (outputForm == "square")) {
943                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
944                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
945                         }else { //column
946                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
947                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
948                         }
949                         
950                         ofstream out;
951                         m->openOutputFile(phylipFileName, out);
952                         
953                         if ((outputForm == "lt") || (outputForm == "square")) {
954                                 //output numSeqs
955                                 out << m->getNumGroups() << endl;
956                         }
957
958                         //make matrix with scores in it
959                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
960                         for (int i = 0; i < m->getNumGroups(); i++) {
961                                 dists[i].resize(m->getNumGroups(), 0.0);
962                         }
963                         
964                         //flip it so you can print it
965                         for (int r=0; r<m->getNumGroups(); r++) { 
966                                 for (int l = 0; l < r; l++) {
967                                         dists[r][l] = utreeScores[count];
968                                         dists[l][r] = utreeScores[count];
969                                         count++;
970                                 }
971                         }
972
973                         //output to file
974                         for (int r=0; r<m->getNumGroups(); r++) { 
975                                 //output name
976                                 string name = (m->getGroups())[r];
977                                 if (name.length() < 10) { //pad with spaces to make compatible
978                                         while (name.length() < 10) {  name += " ";  }
979                                 }
980                                 
981                                 if (outputForm == "lt") {
982                                         out << name << '\t';
983                                         
984                                         //output distances
985                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
986                                         out << endl;
987                                 }else if (outputForm == "square") {
988                                         out << name << '\t';
989                                         
990                                         //output distances
991                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
992                                         out << endl;
993                                 }else{
994                                         //output distances
995                                         for (int l = 0; l < r; l++) {   
996                                                 string otherName = (m->getGroups())[l];
997                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
998                                                         while (otherName.length() < 10) {  otherName += " ";  }
999                                                 }
1000                                                 
1001                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
1002                                         }
1003                                 }
1004                         }
1005                         out.close();
1006                 }
1007         }
1008         catch(exception& e) {
1009                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1010                 exit(1);
1011         }
1012 }
1013 /***********************************************************/
1014 int UnifracWeightedCommand::findIndex(float score, int index) {
1015         try{
1016                 for (int i = 0; i < rScores[index].size(); i++) {
1017                         if (rScores[index][i] >= score) {       return i;       }
1018                 }
1019                 return rScores[index].size();
1020         }
1021         catch(exception& e) {
1022                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1023                 exit(1);
1024         }
1025 }
1026
1027 /***********************************************************/
1028
1029 void UnifracWeightedCommand::calculateFreqsCumuls() {
1030         try {
1031                 //clear out old tree values
1032                 rScoreFreq.clear();
1033                 rScoreFreq.resize(numComp);
1034                 rCumul.clear();
1035                 rCumul.resize(numComp);
1036                 validScores.clear();
1037         
1038                 //calculate frequency
1039                 for (int f = 0; f < numComp; f++) {
1040                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1041                                 validScores[rScores[f][i]] = rScores[f][i];
1042                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1043                                 if (it != rScoreFreq[f].end()) {
1044                                         rScoreFreq[f][rScores[f][i]]++;
1045                                 }else{
1046                                         rScoreFreq[f][rScores[f][i]] = 1;
1047                                 }
1048                         }
1049                 }
1050                 
1051                 //calculate rcumul
1052                 for(int a = 0; a < numComp; a++) {
1053                         float rcumul = 1.0000;
1054                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1055                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1056                                 //make rscoreFreq map and rCumul
1057                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1058                                 rCumul[a][it->first] = rcumul;
1059                                 //get percentage of random trees with that info
1060                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1061                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1062                         }
1063                 }
1064
1065         }
1066         catch(exception& e) {
1067                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1068                 exit(1);
1069         }
1070 }
1071 /***********************************************************/
1072
1073
1074
1075
1076