]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
classify.seqs allows sequences to be in taxonomy file that are not in template. ...
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
26         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
27         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
28                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
29                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){ 
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){  
69         try {
70         string outputFileName = "";
71                 map<string, vector<string> >::iterator it;
72         
73         //is this a type this command creates
74         it = outputTypes.find(type);
75         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76         else {
77             if (type == "weighted")            {   outputFileName =  "weighted";   }
78             else if (type == "wsummary")        {   outputFileName =  "wsummary";   }
79             else if (type == "phylip")           {   outputFileName =  "dist";   }
80             else if (type == "column")           {   outputFileName =  "dist";   }
81             else if (type == "tree")             {   outputFileName =  "tre";   }
82             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
83         }
84         return outputFileName;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 UnifracWeightedCommand::UnifracWeightedCommand(){       
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["weighted"] = tempOutNames;
98                 outputTypes["wsummary"] = tempOutNames;
99                 outputTypes["phylip"] = tempOutNames;
100                 outputTypes["column"] = tempOutNames;
101         outputTypes["tree"] = tempOutNames;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
105                 exit(1);
106         }
107 }
108
109 /***********************************************************/
110 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
111         try {
112                 abort = false; calledHelp = false;   
113                         
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters=parser.getParameters();
123                         map<string,string>::iterator it;
124                         
125                         ValidParameters validParameter;
126                 
127                         //check to make sure all parameters are valid for command
128                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["weighted"] = tempOutNames;
135                         outputTypes["wsummary"] = tempOutNames;
136                         outputTypes["phylip"] = tempOutNames;
137                         outputTypes["column"] = tempOutNames;
138             outputTypes["tree"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("tree");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
151                                 }
152                                 
153                                 it = parameters.find("group");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168                 
169                 it = parameters.find("count");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
175                                 }
176                         }
177                         
178                         //check for required parameters
179                         treefile = validParameter.validFile(parameters, "tree", true);
180                         if (treefile == "not open") { treefile = ""; abort = true; }
181                         else if (treefile == "not found") {                             //if there is a current design file, use it
182                                 treefile = m->getTreeFile(); 
183                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
184                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
185                         }else { m->setTreeFile(treefile); }     
186                         
187                         //check for required parameters
188                         groupfile = validParameter.validFile(parameters, "group", true);
189                         if (groupfile == "not open") { abort = true; }
190                         else if (groupfile == "not found") { groupfile = ""; }
191                         else { m->setGroupFile(groupfile); }
192                         
193                         namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not open") { namefile = ""; abort = true; }
195                         else if (namefile == "not found") { namefile = ""; }
196                         else { m->setNameFile(namefile); }
197                         
198             countfile = validParameter.validFile(parameters, "count", true);
199                         if (countfile == "not open") { countfile = ""; abort = true; }
200                         else if (countfile == "not found") { countfile = "";  } 
201                         else { m->setCountTableFile(countfile); }
202             
203             if ((namefile != "") && (countfile != "")) {
204                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
205             }
206                         
207             if ((groupfile != "") && (countfile != "")) {
208                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
209             }
210
211                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
212                         
213                                                                                                                                         
214                         //check for optional parameter and set defaults
215                         // ...at some point should added some additional type checking...
216                         groups = validParameter.validFile(parameters, "groups", false);                 
217                         if (groups == "not found") { groups = ""; }
218                         else { 
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                                 
223                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
224                         m->mothurConvert(itersString, iters); 
225                         
226                         string temp = validParameter.validFile(parameters, "distance", false);                  
227                         if (temp == "not found") { phylip = false; outputForm = ""; }
228                         else{
229                 if (temp=="phylip") { temp = "lt"; }
230                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
231                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
232                         }
233                         
234                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
235                         random = m->isTrue(temp);
236                         
237                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
238                         includeRoot = m->isTrue(temp);
239                         
240                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
241                         m->setProcessors(temp);
242                         m->mothurConvert(temp, processors);
243             
244             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
245                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
246             else {  
247                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
248                 else { subsample = false; }
249             }
250                         
251             if (!subsample) { subsampleIters = 0;   }
252             else { subsampleIters = iters;          }
253             
254             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
255                         consensus = m->isTrue(temp);
256             
257                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
258                         if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
259             else {  
260                 CountTable testCt; 
261                 if ((!testCt.testGroups(countfile)) && (subsample)) {
262                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
263                 }
264             }
265             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
267             
268                         if (countfile=="") {
269                 if (namefile == "") {
270                     vector<string> files; files.push_back(treefile);
271                     parser.getNameFile(files);
272                 } 
273             }
274                 }
275                 
276                 
277         }
278         catch(exception& e) {
279                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
280                 exit(1);
281         }
282 }
283 /***********************************************************/
284 int UnifracWeightedCommand::execute() {
285         try {
286         
287                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
288                 
289                 m->setTreeFile(treefile);
290                 
291         TreeReader* reader;
292         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
293         else { reader = new TreeReader(treefile, countfile); }
294         T = reader->getTrees();
295         ct = T[0]->getCountTable();
296         delete reader;
297         
298         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
299                                 
300                 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
301                 m->openOutputFile(sumFile, outSum);
302                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
303                 
304         SharedUtil util;
305                 string s; //to make work with setgroups
306                 Groups = m->getGroups();
307                 vector<string> nameGroups = ct->getNamesOfGroups();
308         if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
309                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
310                 m->setGroups(Groups);
311                 
312         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
313         
314                 Weighted weighted(includeRoot);
315                         
316                 int start = time(NULL);
317             
318         //set or check size
319         if (subsample) {
320             //user has not set size, set size = smallest samples size
321             if (subsampleSize == -1) { 
322                 vector<string> temp; temp.push_back(Groups[0]);
323                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
324                 for (int i = 1; i < Groups.size(); i++) {
325                     int thisSize = ct->getGroupCount(Groups[i]);
326                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
327                 }
328                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
329             }else { //eliminate any too small groups
330                 vector<string> newGroups = Groups;
331                 Groups.clear();
332                 for (int i = 0; i < newGroups.size(); i++) {
333                     int thisSize = ct->getGroupCount(newGroups[i]);
334                     
335                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
336                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
337                 } 
338                 m->setGroups(Groups);
339             }
340         }
341         
342         //here in case some groups are removed by subsample
343         util.getCombos(groupComb, Groups, numComp);
344         
345         if (numComp < processors) { processors = numComp; }
346         
347         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
348         
349         //get weighted scores for users trees
350         for (int i = 0; i < T.size(); i++) {
351             
352             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
353             
354             counter = 0;
355             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
356             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
357             
358             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
359             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
360             
361             if (random) {  
362                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);  
363                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
364                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
365             } 
366             
367             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
368             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
369             
370             //save users score
371             for (int s=0; s<numComp; s++) {
372                 //add users score to vector of user scores
373                 uScores[s].push_back(userData[s]);
374                 //save users tree score for summary file
375                 utreeScores.push_back(userData[s]);
376             }
377             
378             if (random) {  runRandomCalcs(T[i], userData); }
379             
380             //clear data
381             rScores.clear();
382             uScores.clear();
383             validScores.clear();
384             
385             //subsample loop
386             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
387             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
388                 if (m->control_pressed) { break; }
389                 
390                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
391                 CountTable* newCt = new CountTable();
392                 
393                 int sampleTime = 0;
394                 if (m->debug) { sampleTime = time(NULL); }
395                 
396                 //uses method of setting groups to doNotIncludeMe
397                 SubSample sample;
398                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
399                 
400                 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
401                 
402                 //call new weighted function
403                 vector<double> iterData; iterData.resize(numComp,0);
404                 Weighted thisWeighted(includeRoot);
405                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
406                 
407                 //save data to make ave dist, std dist
408                 calcDistsTotals.push_back(iterData);
409                 
410                 delete newCt;
411                 delete subSampleTree;
412                 
413                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
414             }
415             
416             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
417             
418             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
419             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
420         }
421         
422                 
423                 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0;  }
424                 
425         if (phylip) {   createPhylipFile();             }
426     
427                 printWSummaryFile();
428                 
429                 //clear out users groups
430                 m->clearGroups();
431                 delete ct; 
432                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
433                 
434                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
435                 
436                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
437                 
438                 //set phylip file as new current phylipfile
439                 string current = "";
440                 itTypes = outputTypes.find("phylip");
441                 if (itTypes != outputTypes.end()) {
442                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
443                 }
444                 
445                 //set column file as new current columnfile
446                 itTypes = outputTypes.find("column");
447                 if (itTypes != outputTypes.end()) {
448                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
449                 }
450                 
451                 m->mothurOutEndLine();
452                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
453                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
454                 m->mothurOutEndLine();
455                 
456                 return 0;
457                 
458         }
459         catch(exception& e) {
460                 m->errorOut(e, "UnifracWeightedCommand", "execute");
461                 exit(1);
462         }
463 }
464 /**************************************************************************************************/
465 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
466         try {
467         //we need to find the average distance and standard deviation for each groups distance
468         
469         //finds sum
470         vector<double> averages; averages.resize(numComp, 0); 
471         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
472             for (int i = 0; i < dists[thisIter].size(); i++) {  
473                 averages[i] += dists[thisIter][i];
474             }
475         }
476         
477         //finds average.
478         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
479         
480         //find standard deviation
481         vector<double> stdDev; stdDev.resize(numComp, 0);
482                 
483         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
484             for (int j = 0; j < dists[thisIter].size(); j++) {
485                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
486             }
487         }
488         for (int i = 0; i < stdDev.size(); i++) {  
489             stdDev[i] /= (float) subsampleIters; 
490             stdDev[i] = sqrt(stdDev[i]);
491         }
492         
493         //make matrix with scores in it
494         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
495         for (int i = 0; i < m->getNumGroups(); i++) {
496             avedists[i].resize(m->getNumGroups(), 0.0);
497         }
498         
499         //make matrix with scores in it
500         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
501         for (int i = 0; i < m->getNumGroups(); i++) {
502             stddists[i].resize(m->getNumGroups(), 0.0);
503         }
504         
505         //flip it so you can print it
506         int count = 0;
507         for (int r=0; r<m->getNumGroups(); r++) { 
508             for (int l = 0; l < r; l++) {
509                 avedists[r][l] = averages[count];
510                 avedists[l][r] = averages[count];
511                 stddists[r][l] = stdDev[count];
512                 stddists[l][r] = stdDev[count];
513                 count++;
514             }
515         }
516         
517         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
518         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
519         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
520         ofstream out;
521         m->openOutputFile(aveFileName, out);
522         
523         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
524         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
525         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
526         ofstream outStd;
527         m->openOutputFile(stdFileName, outStd);
528         
529         if ((outputForm == "lt") || (outputForm == "square")) {
530             //output numSeqs
531             out << m->getNumGroups() << endl;
532             outStd << m->getNumGroups() << endl;
533         }
534         
535         //output to file
536         for (int r=0; r<m->getNumGroups(); r++) { 
537             //output name
538             string name = (m->getGroups())[r];
539             if (name.length() < 10) { //pad with spaces to make compatible
540                 while (name.length() < 10) {  name += " ";  }
541             }
542             
543             if (outputForm == "lt") {
544                 out << name << '\t';
545                 outStd << name << '\t';
546                 
547                 //output distances
548                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
549                 out << endl;  outStd << endl;
550             }else if (outputForm == "square") {
551                 out << name << '\t';
552                 outStd << name << '\t';
553                 
554                 //output distances
555                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
556                 out << endl; outStd << endl;
557             }else{
558                 //output distances
559                 for (int l = 0; l < r; l++) {   
560                     string otherName = (m->getGroups())[l];
561                     if (otherName.length() < 10) { //pad with spaces to make compatible
562                         while (otherName.length() < 10) {  otherName += " ";  }
563                     }
564                     
565                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
566                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
567                 }
568             }
569         }
570         out.close();
571         outStd.close();
572         
573         return 0;
574     }
575         catch(exception& e) {
576                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
577                 exit(1);
578         }
579 }
580
581 /**************************************************************************************************/
582 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
583         try {
584         
585         //used in tree constructor 
586         m->runParse = false;
587         
588         ///create treemap class from groupmap for tree class to use
589         CountTable newCt;
590         set<string> nameMap;
591         map<string, string> groupMap;
592         set<string> gps;
593         for (int i = 0; i < m->getGroups().size(); i++) { 
594             nameMap.insert(m->getGroups()[i]); 
595             gps.insert(m->getGroups()[i]); 
596             groupMap[m->getGroups()[i]] = m->getGroups()[i];
597         }
598         newCt.createTable(nameMap, groupMap, gps);
599         
600         //clear  old tree names if any
601         m->Treenames.clear();
602         
603         //fills globaldatas tree names
604         m->Treenames = m->getGroups();
605         
606         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
607         
608         if (m->control_pressed) { return 0; }
609         
610         Consensus con;
611         Tree* conTree = con.getTree(newTrees);
612         
613         //create a new filename
614         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");                               
615         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
616         ofstream outTree;
617         m->openOutputFile(conFile, outTree);
618         
619         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
620         outTree.close();
621         
622         return 0;
623
624     }
625         catch(exception& e) {
626                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
627                 exit(1);
628         }
629 }
630 /**************************************************************************************************/
631
632 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
633         try {
634         
635         vector<Tree*> trees;
636         
637         //create a new filename
638         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");                             
639         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
640         
641         ofstream outAll;
642         m->openOutputFile(outputFile, outAll);
643         
644
645         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
646             
647             if (m->control_pressed) { break; }
648             
649             //make matrix with scores in it
650             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
651             for (int j = 0; j < m->getNumGroups(); j++) {
652                 sims[j].resize(m->getNumGroups(), 0.0);
653             }
654             
655             int count = 0;
656                         for (int r=0; r<m->getNumGroups(); r++) { 
657                                 for (int l = 0; l < r; l++) {
658                     double sim = -(dists[i][count]-1.0);
659                                         sims[r][l] = sim;
660                                         sims[l][r] = sim;
661                                         count++;
662                                 }
663                         }
664
665             //create tree
666             Tree* tempTree = new Tree(&myct, sims);
667             tempTree->assembleTree();
668             
669             trees.push_back(tempTree);
670             
671             //print tree
672             tempTree->print(outAll);
673         }
674         
675         outAll.close();
676         
677         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
678         
679         return trees;
680     }
681         catch(exception& e) {
682                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
683                 exit(1);
684         }
685 }
686 /**************************************************************************************************/
687
688 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
689         try {
690         
691         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
692         vector< vector<string> > namesOfGroupCombos;
693         for (int a=0; a<numGroups; a++) { 
694             for (int l = 0; l < a; l++) {       
695                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
696                 namesOfGroupCombos.push_back(groups);
697             }
698         }
699         
700         lines.clear();
701         
702 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
703         if(processors != 1){
704             int numPairs = namesOfGroupCombos.size();
705             int numPairsPerProcessor = numPairs / processors;
706             
707             for (int i = 0; i < processors; i++) {
708                 int startPos = i * numPairsPerProcessor;
709                 if(i == processors - 1){
710                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
711                 }
712                 lines.push_back(linePair(startPos, numPairsPerProcessor));
713             }
714         }
715 #endif
716         
717         
718         //get scores for random trees
719         for (int j = 0; j < iters; j++) {
720             cout << j << endl; 
721 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
722             if(processors == 1){
723                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
724             }else{
725                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
726             }
727 #else
728             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
729 #endif
730             
731             if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
732             
733             //report progress
734             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
735         }
736         lines.clear();
737         
738         //find the signifigance of the score for summary file
739         for (int f = 0; f < numComp; f++) {
740             //sort random scores
741             sort(rScores[f].begin(), rScores[f].end());
742             
743             //the index of the score higher than yours is returned 
744             //so if you have 1000 random trees the index returned is 100 
745             //then there are 900 trees with a score greater then you. 
746             //giving you a signifigance of 0.900
747             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
748             
749             //the signifigance is the number of trees with the users score or higher 
750             WScoreSig.push_back((iters-index)/(float)iters);
751         }
752         
753         //out << "Tree# " << i << endl;
754         calculateFreqsCumuls();
755         printWeightedFile();
756         
757         delete output;
758         
759         return 0;
760     }
761         catch(exception& e) {
762                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
763                 exit(1);
764         }
765 }
766 /**************************************************************************************************/
767
768 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
769         try {
770 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
771                 int process = 1;
772                 vector<int> processIDS;
773                 
774                 EstOutput results;
775                 
776                 //loop through and create all the processes you want
777                 while (process != processors) {
778                         int pid = fork();
779                         
780                         if (pid > 0) {
781                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
782                                 process++;
783                         }else if (pid == 0){
784                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
785                         
786                                 //pass numSeqs to parent
787                                 ofstream out;
788                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
789                                 m->openOutputFile(tempFile, out);
790                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
791                                 out.close();
792                                 
793                                 exit(0);
794                         }else { 
795                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
796                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
797                                 exit(0);
798                         }
799                 }
800                 
801                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
802                 
803                 //force parent to wait until all the processes are done
804                 for (int i=0;i<(processors-1);i++) { 
805                         int temp = processIDS[i];
806                         wait(&temp);
807                 }
808                 
809                 //get data created by processes
810                 for (int i=0;i<(processors-1);i++) { 
811         
812                         ifstream in;
813                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
814                         m->openInputFile(s, in);
815                         
816                         double tempScore;
817                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
818                         in.close();
819                         m->mothurRemove(s);
820                 }
821                 
822                 return 0;
823 #endif          
824         }
825         catch(exception& e) {
826                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
827                 exit(1);
828         }
829 }
830
831 /**************************************************************************************************/
832 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
833  try {
834                 Tree* randT = new Tree(ct);
835      
836         Weighted weighted(includeRoot);
837      
838                 for (int h = start; h < (start+num); h++) {
839         
840                         if (m->control_pressed) { return 0; }
841                 
842                         //initialize weighted score
843                         string groupA = namesOfGroupCombos[h][0]; 
844                         string groupB = namesOfGroupCombos[h][1];
845                         
846                         //copy T[i]'s info.
847                         randT->getCopy(t);
848                          
849                         //create a random tree with same topology as T[i], but different labels
850                         randT->assembleRandomUnifracTree(groupA, groupB);
851                         
852                         if (m->control_pressed) { delete randT;  return 0;  }
853
854                         //get wscore of random tree
855                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
856                 
857                         if (m->control_pressed) { delete randT;  return 0;  }
858                                                                                 
859                         //save scores
860                         scores[h].push_back(randomData[0]);
861                 }
862         
863                 delete randT;
864         
865                 return 0;
866
867         }
868         catch(exception& e) {
869                 m->errorOut(e, "UnifracWeightedCommand", "driver");
870                 exit(1);
871         }
872 }
873 /***********************************************************/
874 void UnifracWeightedCommand::printWeightedFile() {
875         try {
876                 vector<double> data;
877                 vector<string> tags;
878                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
879                 
880                 for(int a = 0; a < numComp; a++) {
881                         output->initFile(groupComb[a], tags);
882                         //print each line
883                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
884                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
885                                 output->output(data);
886                                 data.clear();
887                         } 
888                         output->resetFile();
889                 }
890         }
891         catch(exception& e) {
892                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
893                 exit(1);
894         }
895 }
896
897
898 /***********************************************************/
899 void UnifracWeightedCommand::printWSummaryFile() {
900         try {
901                 //column headers
902                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
903                 m->mothurOut("Tree#\tGroups\tWScore\t");
904                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
905                 outSum << endl; m->mothurOutEndLine();
906                 
907                 //format output
908                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
909                 
910                 //print each line
911                 int count = 0;
912                 for (int i = 0; i < T.size(); i++) { 
913                         for (int j = 0; j < numComp; j++) {
914                                 if (random) {
915                                         if (WScoreSig[count] > (1/(float)iters)) {
916                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
917                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
918                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
919                                         }else{
920                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
921                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
922                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
923                                         }
924                                 }else{
925                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
926                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
927                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
928                                 }
929                                 count++;
930                         }
931                 }
932                 outSum.close();
933         }
934         catch(exception& e) {
935                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
936                 exit(1);
937         }
938 }
939 /***********************************************************/
940 void UnifracWeightedCommand::createPhylipFile() {
941         try {
942                 int count = 0;
943                 //for each tree
944                 for (int i = 0; i < T.size(); i++) { 
945                 
946                         string phylipFileName;
947                         if ((outputForm == "lt") || (outputForm == "square")) {
948                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
949                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
950                         }else { //column
951                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
952                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
953                         }
954                         
955                         ofstream out;
956                         m->openOutputFile(phylipFileName, out);
957                         
958                         if ((outputForm == "lt") || (outputForm == "square")) {
959                                 //output numSeqs
960                                 out << m->getNumGroups() << endl;
961                         }
962
963                         //make matrix with scores in it
964                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
965                         for (int i = 0; i < m->getNumGroups(); i++) {
966                                 dists[i].resize(m->getNumGroups(), 0.0);
967                         }
968                         
969                         //flip it so you can print it
970                         for (int r=0; r<m->getNumGroups(); r++) { 
971                                 for (int l = 0; l < r; l++) {
972                                         dists[r][l] = utreeScores[count];
973                                         dists[l][r] = utreeScores[count];
974                                         count++;
975                                 }
976                         }
977
978                         //output to file
979                         for (int r=0; r<m->getNumGroups(); r++) { 
980                                 //output name
981                                 string name = (m->getGroups())[r];
982                                 if (name.length() < 10) { //pad with spaces to make compatible
983                                         while (name.length() < 10) {  name += " ";  }
984                                 }
985                                 
986                                 if (outputForm == "lt") {
987                                         out << name << '\t';
988                                         
989                                         //output distances
990                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
991                                         out << endl;
992                                 }else if (outputForm == "square") {
993                                         out << name << '\t';
994                                         
995                                         //output distances
996                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
997                                         out << endl;
998                                 }else{
999                                         //output distances
1000                                         for (int l = 0; l < r; l++) {   
1001                                                 string otherName = (m->getGroups())[l];
1002                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
1003                                                         while (otherName.length() < 10) {  otherName += " ";  }
1004                                                 }
1005                                                 
1006                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
1007                                         }
1008                                 }
1009                         }
1010                         out.close();
1011                 }
1012         }
1013         catch(exception& e) {
1014                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1015                 exit(1);
1016         }
1017 }
1018 /***********************************************************/
1019 int UnifracWeightedCommand::findIndex(float score, int index) {
1020         try{
1021                 for (int i = 0; i < rScores[index].size(); i++) {
1022                         if (rScores[index][i] >= score) {       return i;       }
1023                 }
1024                 return rScores[index].size();
1025         }
1026         catch(exception& e) {
1027                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1028                 exit(1);
1029         }
1030 }
1031
1032 /***********************************************************/
1033
1034 void UnifracWeightedCommand::calculateFreqsCumuls() {
1035         try {
1036                 //clear out old tree values
1037                 rScoreFreq.clear();
1038                 rScoreFreq.resize(numComp);
1039                 rCumul.clear();
1040                 rCumul.resize(numComp);
1041                 validScores.clear();
1042         
1043                 //calculate frequency
1044                 for (int f = 0; f < numComp; f++) {
1045                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1046                                 validScores[rScores[f][i]] = rScores[f][i];
1047                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1048                                 if (it != rScoreFreq[f].end()) {
1049                                         rScoreFreq[f][rScores[f][i]]++;
1050                                 }else{
1051                                         rScoreFreq[f][rScores[f][i]] = 1;
1052                                 }
1053                         }
1054                 }
1055                 
1056                 //calculate rcumul
1057                 for(int a = 0; a < numComp; a++) {
1058                         float rcumul = 1.0000;
1059                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1060                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1061                                 //make rscoreFreq map and rCumul
1062                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1063                                 rCumul[a][it->first] = rcumul;
1064                                 //get percentage of random trees with that info
1065                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1066                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1067                         }
1068                 }
1069
1070         }
1071         catch(exception& e) {
1072                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1073                 exit(1);
1074         }
1075 }
1076 /***********************************************************/
1077
1078
1079
1080
1081