]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
28                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 UnifracWeightedCommand::UnifracWeightedCommand(){       
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["weighted"] = tempOutNames;
73                 outputTypes["wsummary"] = tempOutNames;
74                 outputTypes["phylip"] = tempOutNames;
75                 outputTypes["column"] = tempOutNames;
76         outputTypes["tree"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
80                 exit(1);
81         }
82 }
83
84 /***********************************************************/
85 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
86         try {
87                 abort = false; calledHelp = false;   
88                         
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters=parser.getParameters();
98                         map<string,string>::iterator it;
99                         
100                         ValidParameters validParameter;
101                 
102                         //check to make sure all parameters are valid for command
103                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["weighted"] = tempOutNames;
110                         outputTypes["wsummary"] = tempOutNames;
111                         outputTypes["phylip"] = tempOutNames;
112                         outputTypes["column"] = tempOutNames;
113             outputTypes["tree"] = tempOutNames;
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("tree");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
126                                 }
127                                 
128                                 it = parameters.find("group");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("name");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
142                                 }
143                         }
144                         
145                         //check for required parameters
146                         treefile = validParameter.validFile(parameters, "tree", true);
147                         if (treefile == "not open") { treefile = ""; abort = true; }
148                         else if (treefile == "not found") {                             //if there is a current design file, use it
149                                 treefile = m->getTreeFile(); 
150                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
152                         }else { m->setTreeFile(treefile); }     
153                         
154                         //check for required parameters
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else { m->setGroupFile(groupfile); }
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { namefile = ""; abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }
163                         else { m->setNameFile(namefile); }
164                         
165                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
166                         
167                                                                                                                                         
168                         //check for optional parameter and set defaults
169                         // ...at some point should added some additional type checking...
170                         groups = validParameter.validFile(parameters, "groups", false);                 
171                         if (groups == "not found") { groups = ""; }
172                         else { 
173                                 m->splitAtDash(groups, Groups);
174                                 m->setGroups(Groups);
175                         }
176                                 
177                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
178                         m->mothurConvert(itersString, iters); 
179                         
180                         string temp = validParameter.validFile(parameters, "distance", false);                  
181                         if (temp == "not found") { phylip = false; outputForm = ""; }
182                         else{
183                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
184                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
185                         }
186                         
187                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
188                         random = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
191                         includeRoot = m->isTrue(temp);
192                         
193                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
194                         m->setProcessors(temp);
195                         m->mothurConvert(temp, processors);
196             
197             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
198                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
199             else {  
200                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
201                 else { subsample = false; }
202             }
203                         
204             if (!subsample) { subsampleIters = 0;   }
205             else { subsampleIters = iters;          }
206             
207             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
208                         consensus = m->isTrue(temp);
209             
210                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
211                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
212             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
213             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
214             
215                         if (namefile == "") {
216                                 vector<string> files; files.push_back(treefile);
217                                 parser.getNameFile(files);
218                         }
219                 }
220                 
221                 
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
225                 exit(1);
226         }
227 }
228 /***********************************************************/
229 int UnifracWeightedCommand::execute() {
230         try {
231         
232                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
233                 
234                 m->setTreeFile(treefile);
235                 
236         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
237         T = reader->getTrees();
238         tmap = T[0]->getTreeMap();
239         map<string, string> nameMap = reader->getNames();
240         map<string, string> unique2Dup = reader->getNameMap();
241         delete reader;
242     
243         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
244                                 
245                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
246                 m->openOutputFile(sumFile, outSum);
247                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
248                 
249         SharedUtil util;
250                 string s; //to make work with setgroups
251                 Groups = m->getGroups();
252                 vector<string> nameGroups = tmap->getNamesOfGroups();
253                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
254                 m->setGroups(Groups);
255                 
256         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
257         
258                 Weighted weighted(includeRoot);
259                         
260                 int start = time(NULL);
261             
262         //set or check size
263         if (subsample) {
264             //user has not set size, set size = smallest samples size
265             if (subsampleSize == -1) { 
266                 vector<string> temp; temp.push_back(Groups[0]);
267                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
268                 for (int i = 1; i < Groups.size(); i++) {
269                     temp.clear(); temp.push_back(Groups[i]);
270                     int thisSize = (tmap->getNamesSeqs(temp)).size();
271                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
272                 }
273                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
274             }else { //eliminate any too small groups
275                 vector<string> newGroups = Groups;
276                 Groups.clear();
277                 for (int i = 0; i < newGroups.size(); i++) {
278                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
279                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
280                     int thisSize = thisGroupsSeqs.size();
281                     
282                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
283                     else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
284                 } 
285                 m->setGroups(Groups);
286             }
287         }
288         
289         //here in case some groups are removed by subsample
290         util.getCombos(groupComb, Groups, numComp);
291         
292         if (numComp < processors) { processors = numComp; }
293         
294         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
295         
296         //get weighted scores for users trees
297         for (int i = 0; i < T.size(); i++) {
298             
299             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
300             
301             counter = 0;
302             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
303             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
304             
305             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
306             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
307             
308             if (random) {  
309                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
310                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
311                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
312             } 
313             
314             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
315             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
316             
317             //save users score
318             for (int s=0; s<numComp; s++) {
319                 //add users score to vector of user scores
320                 uScores[s].push_back(userData[s]);
321                 //save users tree score for summary file
322                 utreeScores.push_back(userData[s]);
323             }
324             
325             if (random) {  runRandomCalcs(T[i], userData); }
326             
327             //clear data
328             rScores.clear();
329             uScores.clear();
330             validScores.clear();
331             
332             //subsample loop
333             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
334             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
335                 
336                 if (m->control_pressed) { break; }
337                 
338                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
339                 TreeMap* newTmap = new TreeMap();
340                 //newTmap->getCopy(*tmap);
341                 
342                 //SubSample sample;
343                //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
344                 
345                 //uses method of setting groups to doNotIncludeMe
346                 SubSample sample;
347                 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
348
349                 //call new weighted function
350                 vector<double> iterData; iterData.resize(numComp,0);
351                 Weighted thisWeighted(includeRoot);
352                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
353                 
354                 //save data to make ave dist, std dist
355                 calcDistsTotals.push_back(iterData);
356                 
357                 delete newTmap;
358                 delete subSampleTree;
359                 
360                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
361             }
362             
363             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
364             
365             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
366             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
367         }
368         
369                 
370                 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0;  }
371                 
372         if (phylip) {   createPhylipFile();             }
373     
374                 printWSummaryFile();
375                 
376                 //clear out users groups
377                 m->clearGroups();
378                 delete tmap; 
379                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
380                 
381                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
382                 
383                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
384                 
385                 //set phylip file as new current phylipfile
386                 string current = "";
387                 itTypes = outputTypes.find("phylip");
388                 if (itTypes != outputTypes.end()) {
389                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
390                 }
391                 
392                 //set column file as new current columnfile
393                 itTypes = outputTypes.find("column");
394                 if (itTypes != outputTypes.end()) {
395                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
396                 }
397                 
398                 m->mothurOutEndLine();
399                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
400                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
401                 m->mothurOutEndLine();
402                 
403                 return 0;
404                 
405         }
406         catch(exception& e) {
407                 m->errorOut(e, "UnifracWeightedCommand", "execute");
408                 exit(1);
409         }
410 }
411 /**************************************************************************************************/
412 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
413         try {
414         //we need to find the average distance and standard deviation for each groups distance
415         
416         //finds sum
417         vector<double> averages; averages.resize(numComp, 0); 
418         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
419             for (int i = 0; i < dists[thisIter].size(); i++) {  
420                 averages[i] += dists[thisIter][i];
421             }
422         }
423         
424         //finds average.
425         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
426         
427         //find standard deviation
428         vector<double> stdDev; stdDev.resize(numComp, 0);
429                 
430         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
431             for (int j = 0; j < dists[thisIter].size(); j++) {
432                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
433             }
434         }
435         for (int i = 0; i < stdDev.size(); i++) {  
436             stdDev[i] /= (float) subsampleIters; 
437             stdDev[i] = sqrt(stdDev[i]);
438         }
439         
440         //make matrix with scores in it
441         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
442         for (int i = 0; i < m->getNumGroups(); i++) {
443             avedists[i].resize(m->getNumGroups(), 0.0);
444         }
445         
446         //make matrix with scores in it
447         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
448         for (int i = 0; i < m->getNumGroups(); i++) {
449             stddists[i].resize(m->getNumGroups(), 0.0);
450         }
451         
452         //flip it so you can print it
453         int count = 0;
454         for (int r=0; r<m->getNumGroups(); r++) { 
455             for (int l = 0; l < r; l++) {
456                 avedists[r][l] = averages[count];
457                 avedists[l][r] = averages[count];
458                 stddists[r][l] = stdDev[count];
459                 stddists[l][r] = stdDev[count];
460                 count++;
461             }
462         }
463         
464         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave.dist";
465         outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
466         
467         ofstream out;
468         m->openOutputFile(aveFileName, out);
469         
470        string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std.dist";
471        outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
472         
473         ofstream outStd;
474         m->openOutputFile(stdFileName, outStd);
475         
476         if ((outputForm == "lt") || (outputForm == "square")) {
477             //output numSeqs
478             out << m->getNumGroups() << endl;
479             outStd << m->getNumGroups() << endl;
480         }
481         
482         //output to file
483         for (int r=0; r<m->getNumGroups(); r++) { 
484             //output name
485             string name = (m->getGroups())[r];
486             if (name.length() < 10) { //pad with spaces to make compatible
487                 while (name.length() < 10) {  name += " ";  }
488             }
489             
490             if (outputForm == "lt") {
491                 out << name << '\t';
492                 outStd << name << '\t';
493                 
494                 //output distances
495                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
496                 out << endl;  outStd << endl;
497             }else if (outputForm == "square") {
498                 out << name << '\t';
499                 outStd << name << '\t';
500                 
501                 //output distances
502                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
503                 out << endl; outStd << endl;
504             }else{
505                 //output distances
506                 for (int l = 0; l < r; l++) {   
507                     string otherName = (m->getGroups())[l];
508                     if (otherName.length() < 10) { //pad with spaces to make compatible
509                         while (otherName.length() < 10) {  otherName += " ";  }
510                     }
511                     
512                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
513                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
514                 }
515             }
516         }
517         out.close();
518         outStd.close();
519         
520         return 0;
521     }
522         catch(exception& e) {
523                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
524                 exit(1);
525         }
526 }
527
528 /**************************************************************************************************/
529 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
530         try {
531         
532         //used in tree constructor 
533         m->runParse = false;
534         
535         //create treemap class from groupmap for tree class to use
536         TreeMap newTmap;
537         newTmap.makeSim(m->getGroups());
538         
539         //clear  old tree names if any
540         m->Treenames.clear();
541         
542         //fills globaldatas tree names
543         m->Treenames = m->getGroups();
544         
545         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
546         
547         if (m->control_pressed) { return 0; }
548         
549         Consensus con;
550         Tree* conTree = con.getTree(newTrees);
551         
552         //create a new filename
553         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";                           
554         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
555         ofstream outTree;
556         m->openOutputFile(conFile, outTree);
557         
558         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
559         outTree.close();
560         
561         return 0;
562
563     }
564         catch(exception& e) {
565                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
566                 exit(1);
567         }
568 }
569 /**************************************************************************************************/
570
571 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
572         try {
573         
574         vector<Tree*> trees;
575         
576         //create a new filename
577         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";                         
578         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
579         
580         ofstream outAll;
581         m->openOutputFile(outputFile, outAll);
582         
583
584         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
585             
586             if (m->control_pressed) { break; }
587             
588             //make matrix with scores in it
589             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
590             for (int j = 0; j < m->getNumGroups(); j++) {
591                 sims[j].resize(m->getNumGroups(), 0.0);
592             }
593             
594             int count = 0;
595                         for (int r=0; r<m->getNumGroups(); r++) { 
596                                 for (int l = 0; l < r; l++) {
597                     double sim = -(dists[i][count]-1.0);
598                                         sims[r][l] = sim;
599                                         sims[l][r] = sim;
600                                         count++;
601                                 }
602                         }
603
604             //create tree
605             Tree* tempTree = new Tree(&mytmap, sims);
606             map<string, string> empty;
607             tempTree->assembleTree(empty);
608             
609             trees.push_back(tempTree);
610             
611             //print tree
612             tempTree->print(outAll);
613         }
614         
615         outAll.close();
616         
617         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
618         
619         return trees;
620     }
621         catch(exception& e) {
622                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
623                 exit(1);
624         }
625 }
626 /**************************************************************************************************/
627
628 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
629         try {
630         
631         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
632         vector< vector<string> > namesOfGroupCombos;
633         for (int a=0; a<numGroups; a++) { 
634             for (int l = 0; l < a; l++) {       
635                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
636                 namesOfGroupCombos.push_back(groups);
637             }
638         }
639         
640         lines.clear();
641         
642 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
643         if(processors != 1){
644             int numPairs = namesOfGroupCombos.size();
645             int numPairsPerProcessor = numPairs / processors;
646             
647             for (int i = 0; i < processors; i++) {
648                 int startPos = i * numPairsPerProcessor;
649                 if(i == processors - 1){
650                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
651                 }
652                 lines.push_back(linePair(startPos, numPairsPerProcessor));
653             }
654         }
655 #endif
656         
657         
658         //get scores for random trees
659         for (int j = 0; j < iters; j++) {
660             
661 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
662             if(processors == 1){
663                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
664             }else{
665                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
666             }
667 #else
668             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
669 #endif
670             
671             if (m->control_pressed) { delete tmap;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
672             
673             //report progress
674             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
675         }
676         lines.clear();
677         
678         //find the signifigance of the score for summary file
679         for (int f = 0; f < numComp; f++) {
680             //sort random scores
681             sort(rScores[f].begin(), rScores[f].end());
682             
683             //the index of the score higher than yours is returned 
684             //so if you have 1000 random trees the index returned is 100 
685             //then there are 900 trees with a score greater then you. 
686             //giving you a signifigance of 0.900
687             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
688             
689             //the signifigance is the number of trees with the users score or higher 
690             WScoreSig.push_back((iters-index)/(float)iters);
691         }
692         
693         //out << "Tree# " << i << endl;
694         calculateFreqsCumuls();
695         printWeightedFile();
696         
697         delete output;
698         
699         return 0;
700     }
701         catch(exception& e) {
702                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
703                 exit(1);
704         }
705 }
706 /**************************************************************************************************/
707
708 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
709         try {
710 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
711                 int process = 1;
712                 vector<int> processIDS;
713                 
714                 EstOutput results;
715                 
716                 //loop through and create all the processes you want
717                 while (process != processors) {
718                         int pid = fork();
719                         
720                         if (pid > 0) {
721                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
722                                 process++;
723                         }else if (pid == 0){
724                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
725                         
726                                 //pass numSeqs to parent
727                                 ofstream out;
728                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
729                                 m->openOutputFile(tempFile, out);
730                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
731                                 out.close();
732                                 
733                                 exit(0);
734                         }else { 
735                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
736                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
737                                 exit(0);
738                         }
739                 }
740                 
741                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
742                 
743                 //force parent to wait until all the processes are done
744                 for (int i=0;i<(processors-1);i++) { 
745                         int temp = processIDS[i];
746                         wait(&temp);
747                 }
748                 
749                 //get data created by processes
750                 for (int i=0;i<(processors-1);i++) { 
751         
752                         ifstream in;
753                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
754                         m->openInputFile(s, in);
755                         
756                         double tempScore;
757                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
758                         in.close();
759                         m->mothurRemove(s);
760                 }
761                 
762                 return 0;
763 #endif          
764         }
765         catch(exception& e) {
766                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
767                 exit(1);
768         }
769 }
770
771 /**************************************************************************************************/
772 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
773  try {
774                 Tree* randT = new Tree(tmap);
775      
776         Weighted weighted(includeRoot);
777      
778                 for (int h = start; h < (start+num); h++) {
779         
780                         if (m->control_pressed) { return 0; }
781                 
782                         //initialize weighted score
783                         string groupA = namesOfGroupCombos[h][0]; 
784                         string groupB = namesOfGroupCombos[h][1];
785                         
786                         //copy T[i]'s info.
787                         randT->getCopy(t);
788                          
789                         //create a random tree with same topology as T[i], but different labels
790                         randT->assembleRandomUnifracTree(groupA, groupB);
791                         
792                         if (m->control_pressed) { delete randT;  return 0;  }
793
794                         //get wscore of random tree
795                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
796                 
797                         if (m->control_pressed) { delete randT;  return 0;  }
798                                                                                 
799                         //save scores
800                         scores[h].push_back(randomData[0]);
801                 }
802         
803                 delete randT;
804         
805                 return 0;
806
807         }
808         catch(exception& e) {
809                 m->errorOut(e, "UnifracWeightedCommand", "driver");
810                 exit(1);
811         }
812 }
813 /***********************************************************/
814 void UnifracWeightedCommand::printWeightedFile() {
815         try {
816                 vector<double> data;
817                 vector<string> tags;
818                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
819                 
820                 for(int a = 0; a < numComp; a++) {
821                         output->initFile(groupComb[a], tags);
822                         //print each line
823                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
824                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
825                                 output->output(data);
826                                 data.clear();
827                         } 
828                         output->resetFile();
829                 }
830         }
831         catch(exception& e) {
832                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
833                 exit(1);
834         }
835 }
836
837
838 /***********************************************************/
839 void UnifracWeightedCommand::printWSummaryFile() {
840         try {
841                 //column headers
842                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
843                 m->mothurOut("Tree#\tGroups\tWScore\t");
844                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
845                 outSum << endl; m->mothurOutEndLine();
846                 
847                 //format output
848                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
849                 
850                 //print each line
851                 int count = 0;
852                 for (int i = 0; i < T.size(); i++) { 
853                         for (int j = 0; j < numComp; j++) {
854                                 if (random) {
855                                         if (WScoreSig[count] > (1/(float)iters)) {
856                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
857                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
858                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
859                                         }else{
860                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
861                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
862                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
863                                         }
864                                 }else{
865                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
866                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
867                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
868                                 }
869                                 count++;
870                         }
871                 }
872                 outSum.close();
873         }
874         catch(exception& e) {
875                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
876                 exit(1);
877         }
878 }
879 /***********************************************************/
880 void UnifracWeightedCommand::createPhylipFile() {
881         try {
882                 int count = 0;
883                 //for each tree
884                 for (int i = 0; i < T.size(); i++) { 
885                 
886                         string phylipFileName;
887                         if ((outputForm == "lt") || (outputForm == "square")) {
888                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
889                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
890                         }else { //column
891                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
892                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
893                         }
894                         
895                         ofstream out;
896                         m->openOutputFile(phylipFileName, out);
897                         
898                         if ((outputForm == "lt") || (outputForm == "square")) {
899                                 //output numSeqs
900                                 out << m->getNumGroups() << endl;
901                         }
902
903                         //make matrix with scores in it
904                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
905                         for (int i = 0; i < m->getNumGroups(); i++) {
906                                 dists[i].resize(m->getNumGroups(), 0.0);
907                         }
908                         
909                         //flip it so you can print it
910                         for (int r=0; r<m->getNumGroups(); r++) { 
911                                 for (int l = 0; l < r; l++) {
912                                         dists[r][l] = utreeScores[count];
913                                         dists[l][r] = utreeScores[count];
914                                         count++;
915                                 }
916                         }
917
918                         //output to file
919                         for (int r=0; r<m->getNumGroups(); r++) { 
920                                 //output name
921                                 string name = (m->getGroups())[r];
922                                 if (name.length() < 10) { //pad with spaces to make compatible
923                                         while (name.length() < 10) {  name += " ";  }
924                                 }
925                                 
926                                 if (outputForm == "lt") {
927                                         out << name << '\t';
928                                         
929                                         //output distances
930                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
931                                         out << endl;
932                                 }else if (outputForm == "square") {
933                                         out << name << '\t';
934                                         
935                                         //output distances
936                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
937                                         out << endl;
938                                 }else{
939                                         //output distances
940                                         for (int l = 0; l < r; l++) {   
941                                                 string otherName = (m->getGroups())[l];
942                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
943                                                         while (otherName.length() < 10) {  otherName += " ";  }
944                                                 }
945                                                 
946                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
947                                         }
948                                 }
949                         }
950                         out.close();
951                 }
952         }
953         catch(exception& e) {
954                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
955                 exit(1);
956         }
957 }
958 /***********************************************************/
959 int UnifracWeightedCommand::findIndex(float score, int index) {
960         try{
961                 for (int i = 0; i < rScores[index].size(); i++) {
962                         if (rScores[index][i] >= score) {       return i;       }
963                 }
964                 return rScores[index].size();
965         }
966         catch(exception& e) {
967                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
968                 exit(1);
969         }
970 }
971
972 /***********************************************************/
973
974 void UnifracWeightedCommand::calculateFreqsCumuls() {
975         try {
976                 //clear out old tree values
977                 rScoreFreq.clear();
978                 rScoreFreq.resize(numComp);
979                 rCumul.clear();
980                 rCumul.resize(numComp);
981                 validScores.clear();
982         
983                 //calculate frequency
984                 for (int f = 0; f < numComp; f++) {
985                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
986                                 validScores[rScores[f][i]] = rScores[f][i];
987                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
988                                 if (it != rScoreFreq[f].end()) {
989                                         rScoreFreq[f][rScores[f][i]]++;
990                                 }else{
991                                         rScoreFreq[f][rScores[f][i]] = 1;
992                                 }
993                         }
994                 }
995                 
996                 //calculate rcumul
997                 for(int a = 0; a < numComp; a++) {
998                         float rcumul = 1.0000;
999                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1000                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1001                                 //make rscoreFreq map and rCumul
1002                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1003                                 rCumul[a][it->first] = rcumul;
1004                                 //get percentage of random trees with that info
1005                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1006                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1007                         }
1008                 }
1009
1010         }
1011         catch(exception& e) {
1012                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1013                 exit(1);
1014         }
1015 }
1016 /***********************************************************/
1017
1018
1019
1020
1021