]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
Revert to previous commit
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
28                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 UnifracWeightedCommand::UnifracWeightedCommand(){       
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["weighted"] = tempOutNames;
73                 outputTypes["wsummary"] = tempOutNames;
74                 outputTypes["phylip"] = tempOutNames;
75                 outputTypes["column"] = tempOutNames;
76         outputTypes["tree"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
80                 exit(1);
81         }
82 }
83
84 /***********************************************************/
85 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
86         try {
87                 abort = false; calledHelp = false;   
88                         
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters=parser.getParameters();
98                         map<string,string>::iterator it;
99                         
100                         ValidParameters validParameter;
101                 
102                         //check to make sure all parameters are valid for command
103                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["weighted"] = tempOutNames;
110                         outputTypes["wsummary"] = tempOutNames;
111                         outputTypes["phylip"] = tempOutNames;
112                         outputTypes["column"] = tempOutNames;
113             outputTypes["tree"] = tempOutNames;
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("tree");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
126                                 }
127                                 
128                                 it = parameters.find("group");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("name");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
142                                 }
143                         }
144                         
145                         //check for required parameters
146                         treefile = validParameter.validFile(parameters, "tree", true);
147                         if (treefile == "not open") { treefile = ""; abort = true; }
148                         else if (treefile == "not found") {                             //if there is a current design file, use it
149                                 treefile = m->getTreeFile(); 
150                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
152                         }else { m->setTreeFile(treefile); }     
153                         
154                         //check for required parameters
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else { m->setGroupFile(groupfile); }
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { namefile = ""; abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }
163                         else { m->setNameFile(namefile); }
164                         
165                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
166                         
167                                                                                                                                         
168                         //check for optional parameter and set defaults
169                         // ...at some point should added some additional type checking...
170                         groups = validParameter.validFile(parameters, "groups", false);                 
171                         if (groups == "not found") { groups = ""; }
172                         else { 
173                                 m->splitAtDash(groups, Groups);
174                                 m->setGroups(Groups);
175                         }
176                                 
177                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
178                         m->mothurConvert(itersString, iters); 
179                         
180                         string temp = validParameter.validFile(parameters, "distance", false);                  
181                         if (temp == "not found") { phylip = false; outputForm = ""; }
182                         else{
183                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
184                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
185                         }
186                         
187                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
188                         random = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
191                         includeRoot = m->isTrue(temp);
192                         
193                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
194                         m->setProcessors(temp);
195                         m->mothurConvert(temp, processors);
196             
197             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
198                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
199             else {  
200                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
201                 else { subsample = false; }
202             }
203                         
204             if (!subsample) { subsampleIters = 0;   }
205             else { subsampleIters = iters;          }
206             
207             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
208                         consensus = m->isTrue(temp);
209             
210                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
211                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
212             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
213             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
214             
215                         if (namefile == "") {
216                                 vector<string> files; files.push_back(treefile);
217                                 parser.getNameFile(files);
218                         }
219                 }
220                 
221                 
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
225                 exit(1);
226         }
227 }
228 /***********************************************************/
229 int UnifracWeightedCommand::execute() {
230         try {
231         
232                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
233                 
234                 m->setTreeFile(treefile);
235                 
236         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
237         T = reader->getTrees();
238         tmap = T[0]->getTreeMap();
239         map<string, string> nameMap = reader->getNames();
240         delete reader;
241     
242         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
243                                 
244                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
245                 m->openOutputFile(sumFile, outSum);
246                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
247                 
248         SharedUtil util;
249                 string s; //to make work with setgroups
250                 Groups = m->getGroups();
251                 vector<string> nameGroups = tmap->getNamesOfGroups();
252                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
253                 m->setGroups(Groups);
254                 
255         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
256         
257                 Weighted weighted(includeRoot);
258                         
259                 int start = time(NULL);
260             
261         //set or check size
262         if (subsample) {
263             //user has not set size, set size = smallest samples size
264             if (subsampleSize == -1) { 
265                 vector<string> temp; temp.push_back(Groups[0]);
266                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
267                 for (int i = 1; i < Groups.size(); i++) {
268                     temp.clear(); temp.push_back(Groups[i]);
269                     int thisSize = (tmap->getNamesSeqs(temp)).size();
270                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
271                 }
272                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
273             }else { //eliminate any too small groups
274                 vector<string> newGroups = Groups;
275                 Groups.clear();
276                 for (int i = 0; i < newGroups.size(); i++) {
277                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
278                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
279                     int thisSize = thisGroupsSeqs.size();
280                     
281                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
282                     else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
283                 } 
284                 m->setGroups(Groups);
285             }
286         }
287         
288         //here in case some groups are removed by subsample
289         util.getCombos(groupComb, Groups, numComp);
290         
291         if (numComp < processors) { processors = numComp; }
292         
293         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
294         
295         //get weighted scores for users trees
296         for (int i = 0; i < T.size(); i++) {
297             
298             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
299             
300             counter = 0;
301             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
302             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
303             
304             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
305             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
306             
307             if (random) {  
308                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
309                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
310                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
311             } 
312             
313             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
314             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
315             
316             //save users score
317             for (int s=0; s<numComp; s++) {
318                 //add users score to vector of user scores
319                 uScores[s].push_back(userData[s]);
320                 //save users tree score for summary file
321                 utreeScores.push_back(userData[s]);
322             }
323             
324             if (random) {  runRandomCalcs(T[i], userData); }
325             
326             //clear data
327             rScores.clear();
328             uScores.clear();
329             validScores.clear();
330             
331             //subsample loop
332             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
333             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
334                 
335                 if (m->control_pressed) { break; }
336                 
337                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
338                 TreeMap* newTmap = new TreeMap();
339                 newTmap->getCopy(*tmap);
340                 
341                 SubSample sample;
342                 Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
343                    
344                 //call new weighted function
345                 vector<double> iterData; iterData.resize(numComp,0);
346                 Weighted thisWeighted(includeRoot);
347                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
348                 
349                 //save data to make ave dist, std dist
350                 calcDistsTotals.push_back(iterData);
351                 
352                 delete newTmap;
353                 delete subSampleTree;
354                 
355                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
356             }
357             
358             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
359             
360             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
361             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
362         }
363         
364                 
365                 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0;  }
366                 
367         if (phylip) {   createPhylipFile();             }
368     
369                 printWSummaryFile();
370                 
371                 //clear out users groups
372                 m->clearGroups();
373                 delete tmap; 
374                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
375                 
376                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
377                 
378                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
379                 
380                 //set phylip file as new current phylipfile
381                 string current = "";
382                 itTypes = outputTypes.find("phylip");
383                 if (itTypes != outputTypes.end()) {
384                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
385                 }
386                 
387                 //set column file as new current columnfile
388                 itTypes = outputTypes.find("column");
389                 if (itTypes != outputTypes.end()) {
390                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
391                 }
392                 
393                 m->mothurOutEndLine();
394                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
395                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
396                 m->mothurOutEndLine();
397                 
398                 return 0;
399                 
400         }
401         catch(exception& e) {
402                 m->errorOut(e, "UnifracWeightedCommand", "execute");
403                 exit(1);
404         }
405 }
406 /**************************************************************************************************/
407 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
408         try {
409         //we need to find the average distance and standard deviation for each groups distance
410         
411         //finds sum
412         vector<double> averages; averages.resize(numComp, 0); 
413         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
414             for (int i = 0; i < dists[thisIter].size(); i++) {  
415                 averages[i] += dists[thisIter][i];
416             }
417         }
418         
419         //finds average.
420         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
421         
422         //find standard deviation
423         vector<double> stdDev; stdDev.resize(numComp, 0);
424                 
425         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
426             for (int j = 0; j < dists[thisIter].size(); j++) {
427                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
428             }
429         }
430         for (int i = 0; i < stdDev.size(); i++) {  
431             stdDev[i] /= (float) subsampleIters; 
432             stdDev[i] = sqrt(stdDev[i]);
433         }
434         
435         //make matrix with scores in it
436         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
437         for (int i = 0; i < m->getNumGroups(); i++) {
438             avedists[i].resize(m->getNumGroups(), 0.0);
439         }
440         
441         //make matrix with scores in it
442         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
443         for (int i = 0; i < m->getNumGroups(); i++) {
444             stddists[i].resize(m->getNumGroups(), 0.0);
445         }
446         
447         //flip it so you can print it
448         int count = 0;
449         for (int r=0; r<m->getNumGroups(); r++) { 
450             for (int l = 0; l < r; l++) {
451                 avedists[r][l] = averages[count];
452                 avedists[l][r] = averages[count];
453                 stddists[r][l] = stdDev[count];
454                 stddists[l][r] = stdDev[count];
455                 count++;
456             }
457         }
458         
459         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave.dist";
460         outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
461         
462         ofstream out;
463         m->openOutputFile(aveFileName, out);
464         
465        string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std.dist";
466        outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
467         
468         ofstream outStd;
469         m->openOutputFile(stdFileName, outStd);
470         
471         if ((outputForm == "lt") || (outputForm == "square")) {
472             //output numSeqs
473             out << m->getNumGroups() << endl;
474             outStd << m->getNumGroups() << endl;
475         }
476         
477         //output to file
478         for (int r=0; r<m->getNumGroups(); r++) { 
479             //output name
480             string name = (m->getGroups())[r];
481             if (name.length() < 10) { //pad with spaces to make compatible
482                 while (name.length() < 10) {  name += " ";  }
483             }
484             
485             if (outputForm == "lt") {
486                 out << name << '\t';
487                 outStd << name << '\t';
488                 
489                 //output distances
490                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
491                 out << endl;  outStd << endl;
492             }else if (outputForm == "square") {
493                 out << name << '\t';
494                 outStd << name << '\t';
495                 
496                 //output distances
497                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
498                 out << endl; outStd << endl;
499             }else{
500                 //output distances
501                 for (int l = 0; l < r; l++) {   
502                     string otherName = (m->getGroups())[l];
503                     if (otherName.length() < 10) { //pad with spaces to make compatible
504                         while (otherName.length() < 10) {  otherName += " ";  }
505                     }
506                     
507                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
508                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
509                 }
510             }
511         }
512         out.close();
513         outStd.close();
514         
515         return 0;
516     }
517         catch(exception& e) {
518                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
519                 exit(1);
520         }
521 }
522
523 /**************************************************************************************************/
524 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
525         try {
526         
527         //used in tree constructor 
528         m->runParse = false;
529         
530         //create treemap class from groupmap for tree class to use
531         TreeMap newTmap;
532         newTmap.makeSim(m->getGroups());
533         
534         //clear  old tree names if any
535         m->Treenames.clear();
536         
537         //fills globaldatas tree names
538         m->Treenames = m->getGroups();
539         
540         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
541         
542         if (m->control_pressed) { return 0; }
543         
544         Consensus con;
545         Tree* conTree = con.getTree(newTrees);
546         
547         //create a new filename
548         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";                           
549         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
550         ofstream outTree;
551         m->openOutputFile(conFile, outTree);
552         
553         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
554         outTree.close();
555         
556         return 0;
557
558     }
559         catch(exception& e) {
560                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
561                 exit(1);
562         }
563 }
564 /**************************************************************************************************/
565
566 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
567         try {
568         
569         vector<Tree*> trees;
570         
571         //create a new filename
572         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";                         
573         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
574         
575         ofstream outAll;
576         m->openOutputFile(outputFile, outAll);
577         
578
579         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
580             
581             if (m->control_pressed) { break; }
582             
583             //make matrix with scores in it
584             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
585             for (int j = 0; j < m->getNumGroups(); j++) {
586                 sims[j].resize(m->getNumGroups(), 0.0);
587             }
588             
589             int count = 0;
590                         for (int r=0; r<m->getNumGroups(); r++) { 
591                                 for (int l = 0; l < r; l++) {
592                     double sim = -(dists[i][count]-1.0);
593                                         sims[r][l] = sim;
594                                         sims[l][r] = sim;
595                                         count++;
596                                 }
597                         }
598
599             //create tree
600             Tree* tempTree = new Tree(&mytmap, sims);
601             map<string, string> empty;
602             tempTree->assembleTree(empty);
603             
604             trees.push_back(tempTree);
605             
606             //print tree
607             tempTree->print(outAll);
608         }
609         
610         outAll.close();
611         
612         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
613         
614         return trees;
615     }
616         catch(exception& e) {
617                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
618                 exit(1);
619         }
620 }
621 /**************************************************************************************************/
622
623 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
624         try {
625         
626         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
627         vector< vector<string> > namesOfGroupCombos;
628         for (int a=0; a<numGroups; a++) { 
629             for (int l = 0; l < a; l++) {       
630                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
631                 namesOfGroupCombos.push_back(groups);
632             }
633         }
634         
635         lines.clear();
636         
637 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
638         if(processors != 1){
639             int numPairs = namesOfGroupCombos.size();
640             int numPairsPerProcessor = numPairs / processors;
641             
642             for (int i = 0; i < processors; i++) {
643                 int startPos = i * numPairsPerProcessor;
644                 if(i == processors - 1){
645                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
646                 }
647                 lines.push_back(linePair(startPos, numPairsPerProcessor));
648             }
649         }
650 #endif
651         
652         
653         //get scores for random trees
654         for (int j = 0; j < iters; j++) {
655             
656 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
657             if(processors == 1){
658                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
659             }else{
660                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
661             }
662 #else
663             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
664 #endif
665             
666             if (m->control_pressed) { delete tmap;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
667             
668             //report progress
669             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
670         }
671         lines.clear();
672         
673         //find the signifigance of the score for summary file
674         for (int f = 0; f < numComp; f++) {
675             //sort random scores
676             sort(rScores[f].begin(), rScores[f].end());
677             
678             //the index of the score higher than yours is returned 
679             //so if you have 1000 random trees the index returned is 100 
680             //then there are 900 trees with a score greater then you. 
681             //giving you a signifigance of 0.900
682             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
683             
684             //the signifigance is the number of trees with the users score or higher 
685             WScoreSig.push_back((iters-index)/(float)iters);
686         }
687         
688         //out << "Tree# " << i << endl;
689         calculateFreqsCumuls();
690         printWeightedFile();
691         
692         delete output;
693         
694         return 0;
695     }
696         catch(exception& e) {
697                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
698                 exit(1);
699         }
700 }
701 /**************************************************************************************************/
702
703 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
704         try {
705 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
706                 int process = 1;
707                 vector<int> processIDS;
708                 
709                 EstOutput results;
710                 
711                 //loop through and create all the processes you want
712                 while (process != processors) {
713                         int pid = fork();
714                         
715                         if (pid > 0) {
716                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
717                                 process++;
718                         }else if (pid == 0){
719                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
720                         
721                                 //pass numSeqs to parent
722                                 ofstream out;
723                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
724                                 m->openOutputFile(tempFile, out);
725                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
726                                 out.close();
727                                 
728                                 exit(0);
729                         }else { 
730                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
731                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
732                                 exit(0);
733                         }
734                 }
735                 
736                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
737                 
738                 //force parent to wait until all the processes are done
739                 for (int i=0;i<(processors-1);i++) { 
740                         int temp = processIDS[i];
741                         wait(&temp);
742                 }
743                 
744                 //get data created by processes
745                 for (int i=0;i<(processors-1);i++) { 
746         
747                         ifstream in;
748                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
749                         m->openInputFile(s, in);
750                         
751                         double tempScore;
752                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
753                         in.close();
754                         m->mothurRemove(s);
755                 }
756                 
757                 return 0;
758 #endif          
759         }
760         catch(exception& e) {
761                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
762                 exit(1);
763         }
764 }
765
766 /**************************************************************************************************/
767 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
768  try {
769                 Tree* randT = new Tree(tmap);
770      
771         Weighted weighted(includeRoot);
772      
773                 for (int h = start; h < (start+num); h++) {
774         
775                         if (m->control_pressed) { return 0; }
776                 
777                         //initialize weighted score
778                         string groupA = namesOfGroupCombos[h][0]; 
779                         string groupB = namesOfGroupCombos[h][1];
780                         
781                         //copy T[i]'s info.
782                         randT->getCopy(t);
783                          
784                         //create a random tree with same topology as T[i], but different labels
785                         randT->assembleRandomUnifracTree(groupA, groupB);
786                         
787                         if (m->control_pressed) { delete randT;  return 0;  }
788
789                         //get wscore of random tree
790                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
791                 
792                         if (m->control_pressed) { delete randT;  return 0;  }
793                                                                                 
794                         //save scores
795                         scores[h].push_back(randomData[0]);
796                 }
797         
798                 delete randT;
799         
800                 return 0;
801
802         }
803         catch(exception& e) {
804                 m->errorOut(e, "UnifracWeightedCommand", "driver");
805                 exit(1);
806         }
807 }
808 /***********************************************************/
809 void UnifracWeightedCommand::printWeightedFile() {
810         try {
811                 vector<double> data;
812                 vector<string> tags;
813                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
814                 
815                 for(int a = 0; a < numComp; a++) {
816                         output->initFile(groupComb[a], tags);
817                         //print each line
818                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
819                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
820                                 output->output(data);
821                                 data.clear();
822                         } 
823                         output->resetFile();
824                 }
825         }
826         catch(exception& e) {
827                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
828                 exit(1);
829         }
830 }
831
832
833 /***********************************************************/
834 void UnifracWeightedCommand::printWSummaryFile() {
835         try {
836                 //column headers
837                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
838                 m->mothurOut("Tree#\tGroups\tWScore\t");
839                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
840                 outSum << endl; m->mothurOutEndLine();
841                 
842                 //format output
843                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
844                 
845                 //print each line
846                 int count = 0;
847                 for (int i = 0; i < T.size(); i++) { 
848                         for (int j = 0; j < numComp; j++) {
849                                 if (random) {
850                                         if (WScoreSig[count] > (1/(float)iters)) {
851                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
852                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
853                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
854                                         }else{
855                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
856                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
857                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
858                                         }
859                                 }else{
860                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
861                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
862                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
863                                 }
864                                 count++;
865                         }
866                 }
867                 outSum.close();
868         }
869         catch(exception& e) {
870                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
871                 exit(1);
872         }
873 }
874 /***********************************************************/
875 void UnifracWeightedCommand::createPhylipFile() {
876         try {
877                 int count = 0;
878                 //for each tree
879                 for (int i = 0; i < T.size(); i++) { 
880                 
881                         string phylipFileName;
882                         if ((outputForm == "lt") || (outputForm == "square")) {
883                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
884                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
885                         }else { //column
886                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
887                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
888                         }
889                         
890                         ofstream out;
891                         m->openOutputFile(phylipFileName, out);
892                         
893                         if ((outputForm == "lt") || (outputForm == "square")) {
894                                 //output numSeqs
895                                 out << m->getNumGroups() << endl;
896                         }
897
898                         //make matrix with scores in it
899                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
900                         for (int i = 0; i < m->getNumGroups(); i++) {
901                                 dists[i].resize(m->getNumGroups(), 0.0);
902                         }
903                         
904                         //flip it so you can print it
905                         for (int r=0; r<m->getNumGroups(); r++) { 
906                                 for (int l = 0; l < r; l++) {
907                                         dists[r][l] = utreeScores[count];
908                                         dists[l][r] = utreeScores[count];
909                                         count++;
910                                 }
911                         }
912
913                         //output to file
914                         for (int r=0; r<m->getNumGroups(); r++) { 
915                                 //output name
916                                 string name = (m->getGroups())[r];
917                                 if (name.length() < 10) { //pad with spaces to make compatible
918                                         while (name.length() < 10) {  name += " ";  }
919                                 }
920                                 
921                                 if (outputForm == "lt") {
922                                         out << name << '\t';
923                                         
924                                         //output distances
925                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
926                                         out << endl;
927                                 }else if (outputForm == "square") {
928                                         out << name << '\t';
929                                         
930                                         //output distances
931                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
932                                         out << endl;
933                                 }else{
934                                         //output distances
935                                         for (int l = 0; l < r; l++) {   
936                                                 string otherName = (m->getGroups())[l];
937                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
938                                                         while (otherName.length() < 10) {  otherName += " ";  }
939                                                 }
940                                                 
941                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
942                                         }
943                                 }
944                         }
945                         out.close();
946                 }
947         }
948         catch(exception& e) {
949                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
950                 exit(1);
951         }
952 }
953 /***********************************************************/
954 int UnifracWeightedCommand::findIndex(float score, int index) {
955         try{
956                 for (int i = 0; i < rScores[index].size(); i++) {
957                         if (rScores[index][i] >= score) {       return i;       }
958                 }
959                 return rScores[index].size();
960         }
961         catch(exception& e) {
962                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
963                 exit(1);
964         }
965 }
966
967 /***********************************************************/
968
969 void UnifracWeightedCommand::calculateFreqsCumuls() {
970         try {
971                 //clear out old tree values
972                 rScoreFreq.clear();
973                 rScoreFreq.resize(numComp);
974                 rCumul.clear();
975                 rCumul.resize(numComp);
976                 validScores.clear();
977         
978                 //calculate frequency
979                 for (int f = 0; f < numComp; f++) {
980                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
981                                 validScores[rScores[f][i]] = rScores[f][i];
982                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
983                                 if (it != rScoreFreq[f].end()) {
984                                         rScoreFreq[f][rScores[f][i]]++;
985                                 }else{
986                                         rScoreFreq[f][rScores[f][i]] = 1;
987                                 }
988                         }
989                 }
990                 
991                 //calculate rcumul
992                 for(int a = 0; a < numComp; a++) {
993                         float rcumul = 1.0000;
994                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
995                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
996                                 //make rscoreFreq map and rCumul
997                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
998                                 rCumul[a][it->first] = rcumul;
999                                 //get percentage of random trees with that info
1000                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1001                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1002                         }
1003                 }
1004
1005         }
1006         catch(exception& e) {
1007                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1008                 exit(1);
1009         }
1010 }
1011 /***********************************************************/
1012
1013
1014
1015
1016