]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
94ae125962c85c2cc0b7f100bf5178b89da89bdd
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
26         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
27         CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
28                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
29                 CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){ 
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         if (type == "weighted")            {  pattern = "[filename],weighted-[filename],[tag],weighted";   }
72         else if (type == "wsummary")        {  pattern = "[filename],wsummary";   }
73         else if (type == "phylip")           {  pattern = "[filename],[tag],[tag2],dist";   }
74         else if (type == "column")           {  pattern = "[filename],[tag],[tag2],dist";   }
75         else if (type == "tree")             {  pattern = "[filename],[tag],[tag2],tre";   }
76         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
77         
78         return pattern;
79     }
80     catch(exception& e) {
81         m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
82         exit(1);
83     }
84 }
85 //**********************************************************************************************************************
86 UnifracWeightedCommand::UnifracWeightedCommand(){       
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["weighted"] = tempOutNames;
92                 outputTypes["wsummary"] = tempOutNames;
93                 outputTypes["phylip"] = tempOutNames;
94                 outputTypes["column"] = tempOutNames;
95         outputTypes["tree"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
99                 exit(1);
100         }
101 }
102
103 /***********************************************************/
104 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
105         try {
106                 abort = false; calledHelp = false;   
107                         
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters=parser.getParameters();
117                         map<string,string>::iterator it;
118                         
119                         ValidParameters validParameter;
120                 
121                         //check to make sure all parameters are valid for command
122                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["weighted"] = tempOutNames;
129                         outputTypes["wsummary"] = tempOutNames;
130                         outputTypes["phylip"] = tempOutNames;
131                         outputTypes["column"] = tempOutNames;
132             outputTypes["tree"] = tempOutNames;
133                         
134                         //if the user changes the input directory command factory will send this info to us in the output parameter 
135                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
136                         if (inputDir == "not found"){   inputDir = "";          }
137                         else {
138                                 string path;
139                                 it = parameters.find("tree");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                                 
155                                 it = parameters.find("name");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
161                                 }
162                 
163                 it = parameters.find("count");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
169                                 }
170                         }
171                         
172                         //check for required parameters
173                         treefile = validParameter.validFile(parameters, "tree", true);
174                         if (treefile == "not open") { treefile = ""; abort = true; }
175                         else if (treefile == "not found") {                             //if there is a current design file, use it
176                                 treefile = m->getTreeFile(); 
177                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
179                         }else { m->setTreeFile(treefile); }     
180                         
181                         //check for required parameters
182                         groupfile = validParameter.validFile(parameters, "group", true);
183                         if (groupfile == "not open") { abort = true; }
184                         else if (groupfile == "not found") { groupfile = ""; }
185                         else { m->setGroupFile(groupfile); }
186                         
187                         namefile = validParameter.validFile(parameters, "name", true);
188                         if (namefile == "not open") { namefile = ""; abort = true; }
189                         else if (namefile == "not found") { namefile = ""; }
190                         else { m->setNameFile(namefile); }
191                         
192             countfile = validParameter.validFile(parameters, "count", true);
193                         if (countfile == "not open") { countfile = ""; abort = true; }
194                         else if (countfile == "not found") { countfile = "";  } 
195                         else { m->setCountTableFile(countfile); }
196             
197             if ((namefile != "") && (countfile != "")) {
198                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
199             }
200                         
201             if ((groupfile != "") && (countfile != "")) {
202                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
203             }
204
205                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
206                         
207                                                                                                                                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         groups = validParameter.validFile(parameters, "groups", false);                 
211                         if (groups == "not found") { groups = ""; }
212                         else { 
213                                 m->splitAtDash(groups, Groups);
214                                 m->setGroups(Groups);
215                         }
216                                 
217                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
218                         m->mothurConvert(itersString, iters); 
219                         
220                         string temp = validParameter.validFile(parameters, "distance", false);                  
221                         if (temp == "not found") { phylip = false; outputForm = ""; }
222                         else{
223                 if (temp=="phylip") { temp = "lt"; }
224                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
225                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
226                         }
227                         
228                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
229                         random = m->isTrue(temp);
230                         
231                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
232                         includeRoot = m->isTrue(temp);
233                         
234                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
235                         m->setProcessors(temp);
236                         m->mothurConvert(temp, processors);
237             
238             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
239                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
240             else {  
241                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
242                 else { subsample = false; }
243             }
244                         
245             if (!subsample) { subsampleIters = 0;   }
246             else { subsampleIters = iters;          }
247             
248             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
249                         consensus = m->isTrue(temp);
250             
251                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
252                         if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
253             else {  
254                 CountTable testCt; 
255                 if ((!testCt.testGroups(countfile)) && (subsample)) {
256                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
257                 }
258             }
259             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
260             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
261             
262                         if (countfile=="") {
263                 if (namefile == "") {
264                     vector<string> files; files.push_back(treefile);
265                     parser.getNameFile(files);
266                 } 
267             }
268                 }
269                 
270                 
271         }
272         catch(exception& e) {
273                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
274                 exit(1);
275         }
276 }
277 /***********************************************************/
278 int UnifracWeightedCommand::execute() {
279         try {
280         
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282                 
283                 m->setTreeFile(treefile);
284                 
285         TreeReader* reader;
286         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
287         else { reader = new TreeReader(treefile, countfile); }
288         T = reader->getTrees();
289         ct = T[0]->getCountTable();
290         delete reader;
291         
292         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
293                 
294         map<string, string> variables; 
295                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
296                 sumFile = getOutputFileName("wsummary",variables);
297                 m->openOutputFile(sumFile, outSum);
298                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
299                 
300         SharedUtil util;
301                 string s; //to make work with setgroups
302                 Groups = m->getGroups();
303                 vector<string> nameGroups = ct->getNamesOfGroups();
304         if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
305                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
306                 m->setGroups(Groups);
307                 
308         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
309         
310                 Weighted weighted(includeRoot);
311                         
312                 int start = time(NULL);
313             
314         //set or check size
315         if (subsample) {
316             //user has not set size, set size = smallest samples size
317             if (subsampleSize == -1) { 
318                 vector<string> temp; temp.push_back(Groups[0]);
319                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
320                 for (int i = 1; i < Groups.size(); i++) {
321                     int thisSize = ct->getGroupCount(Groups[i]);
322                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
323                 }
324                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
325             }else { //eliminate any too small groups
326                 vector<string> newGroups = Groups;
327                 Groups.clear();
328                 for (int i = 0; i < newGroups.size(); i++) {
329                     int thisSize = ct->getGroupCount(newGroups[i]);
330                     
331                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
332                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
333                 } 
334                 m->setGroups(Groups);
335             }
336         }
337         
338         //here in case some groups are removed by subsample
339         util.getCombos(groupComb, Groups, numComp);
340         
341         if (numComp < processors) { processors = numComp; }
342         
343         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
344         
345         //get weighted scores for users trees
346         for (int i = 0; i < T.size(); i++) {
347             
348             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
349             
350             counter = 0;
351             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
352             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
353             
354             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
355             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
356             
357             if (random) {  
358                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
359                 variables["[tag]"] = toString(i+1);
360                 string wFileName = getOutputFileName("weighted", variables);
361                 output = new ColumnFile(wFileName, itersString);
362                                 outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
363             } 
364             
365             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
366             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
367             
368             //save users score
369             for (int s=0; s<numComp; s++) {
370                 //add users score to vector of user scores
371                 uScores[s].push_back(userData[s]);
372                 //save users tree score for summary file
373                 utreeScores.push_back(userData[s]);
374             }
375             
376             if (random) {  runRandomCalcs(T[i], userData); }
377             
378             //clear data
379             rScores.clear();
380             uScores.clear();
381             validScores.clear();
382             
383             //subsample loop
384             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
385             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
386                 if (m->control_pressed) { break; }
387                 
388                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
389                 CountTable* newCt = new CountTable();
390                 
391                 int sampleTime = 0;
392                 if (m->debug) { sampleTime = time(NULL); }
393                 
394                 //uses method of setting groups to doNotIncludeMe
395                 SubSample sample;
396                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
397                 
398                 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
399                 
400                 //call new weighted function
401                 vector<double> iterData; iterData.resize(numComp,0);
402                 Weighted thisWeighted(includeRoot);
403                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
404                 
405                 //save data to make ave dist, std dist
406                 calcDistsTotals.push_back(iterData);
407                 
408                 delete newCt;
409                 delete subSampleTree;
410                 
411                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
412             }
413             
414             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
415             
416             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
417             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
418         }
419         
420                 
421                 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0;  }
422                 
423         if (phylip) {   createPhylipFile();             }
424     
425                 printWSummaryFile();
426                 
427                 //clear out users groups
428                 m->clearGroups();
429                 delete ct; 
430                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
431                 
432                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
433                 
434                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
435                 
436                 //set phylip file as new current phylipfile
437                 string current = "";
438                 itTypes = outputTypes.find("phylip");
439                 if (itTypes != outputTypes.end()) {
440                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
441                 }
442                 
443                 //set column file as new current columnfile
444                 itTypes = outputTypes.find("column");
445                 if (itTypes != outputTypes.end()) {
446                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
447                 }
448                 
449                 m->mothurOutEndLine();
450                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
451                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
452                 m->mothurOutEndLine();
453                 
454                 return 0;
455                 
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "UnifracWeightedCommand", "execute");
459                 exit(1);
460         }
461 }
462 /**************************************************************************************************/
463 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
464         try {
465         //we need to find the average distance and standard deviation for each groups distance
466         
467         //finds sum
468         vector<double> averages = m->getAverages(dists);        
469         
470         //find standard deviation
471         vector<double> stdDev = m->getStandardDeviation(dists, averages);
472         
473         //make matrix with scores in it
474         vector< vector<double> > avedists;      //avedists.resize(m->getNumGroups());
475         for (int i = 0; i < m->getNumGroups(); i++) {
476             vector<double> temp;
477             for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
478             avedists.push_back(temp);
479         }
480         
481         //make matrix with scores in it
482         vector< vector<double> > stddists;      //stddists.resize(m->getNumGroups());
483         for (int i = 0; i < m->getNumGroups(); i++) {
484             vector<double> temp;
485             for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
486             //stddists[i].resize(m->getNumGroups(), 0.0);
487             stddists.push_back(temp);
488         }
489
490         
491         //flip it so you can print it
492         int count = 0;
493         for (int r=0; r<m->getNumGroups(); r++) { 
494             for (int l = 0; l < r; l++) {
495                 avedists[r][l] = averages[count];
496                 avedists[l][r] = averages[count];
497                 stddists[r][l] = stdDev[count];
498                 stddists[l][r] = stdDev[count];
499                 count++;
500             }
501         }
502         
503         map<string, string> variables; 
504                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
505         variables["[tag]"] = toString(treeNum+1);
506         variables["[tag2]"] = "weighted.ave";
507         string aveFileName = getOutputFileName("phylip",variables);
508         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
509         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
510         ofstream out;
511         m->openOutputFile(aveFileName, out);
512         
513         variables["[tag2]"] = "weighted.std";
514         string stdFileName = getOutputFileName("phylip",variables);
515         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
516         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
517         ofstream outStd;
518         m->openOutputFile(stdFileName, outStd);
519         
520         if ((outputForm == "lt") || (outputForm == "square")) {
521             //output numSeqs
522             out << m->getNumGroups() << endl;
523             outStd << m->getNumGroups() << endl;
524         }
525         
526         //output to file
527         for (int r=0; r<m->getNumGroups(); r++) { 
528             //output name
529             string name = (m->getGroups())[r];
530             if (name.length() < 10) { //pad with spaces to make compatible
531                 while (name.length() < 10) {  name += " ";  }
532             }
533             
534             if (outputForm == "lt") {
535                 out << name << '\t';
536                 outStd << name << '\t';
537                 
538                 //output distances
539                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
540                 out << endl;  outStd << endl;
541             }else if (outputForm == "square") {
542                 out << name << '\t';
543                 outStd << name << '\t';
544                 
545                 //output distances
546                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
547                 out << endl; outStd << endl;
548             }else{
549                 //output distances
550                 for (int l = 0; l < r; l++) {   
551                     string otherName = (m->getGroups())[l];
552                     if (otherName.length() < 10) { //pad with spaces to make compatible
553                         while (otherName.length() < 10) {  otherName += " ";  }
554                     }
555                     
556                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
557                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
558                 }
559             }
560         }
561         out.close();
562         outStd.close();
563         
564         return 0;
565     }
566         catch(exception& e) {
567                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
568                 exit(1);
569         }
570 }
571
572 /**************************************************************************************************/
573 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
574         try {
575         
576         //used in tree constructor 
577         m->runParse = false;
578         
579         ///create treemap class from groupmap for tree class to use
580         CountTable newCt;
581         set<string> nameMap;
582         map<string, string> groupMap;
583         set<string> gps;
584         for (int i = 0; i < m->getGroups().size(); i++) { 
585             nameMap.insert(m->getGroups()[i]); 
586             gps.insert(m->getGroups()[i]); 
587             groupMap[m->getGroups()[i]] = m->getGroups()[i];
588         }
589         newCt.createTable(nameMap, groupMap, gps);
590         
591         //clear  old tree names if any
592         m->Treenames.clear();
593         
594         //fills globaldatas tree names
595         m->Treenames = m->getGroups();
596         
597         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
598         
599         if (m->control_pressed) { return 0; }
600         
601         Consensus con;
602         Tree* conTree = con.getTree(newTrees);
603         
604         //create a new filename
605         map<string, string> variables; 
606                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
607         variables["[tag]"] = toString(treeNum+1);
608         variables["[tag2]"] = "weighted.cons";
609         string conFile = getOutputFileName("tree",variables);                                                   
610         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
611         ofstream outTree;
612         m->openOutputFile(conFile, outTree);
613         
614         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
615         outTree.close();
616         
617         return 0;
618
619     }
620         catch(exception& e) {
621                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
622                 exit(1);
623         }
624 }
625 /**************************************************************************************************/
626
627 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
628         try {
629         
630         vector<Tree*> trees;
631         
632         //create a new filename
633         map<string, string> variables; 
634                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
635         variables["[tag]"] = toString(treeNum+1);
636         variables["[tag2]"] = "weighted.all";
637         string outputFile = getOutputFileName("tree",variables);                                
638         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
639         
640         ofstream outAll;
641         m->openOutputFile(outputFile, outAll);
642         
643
644         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
645             
646             if (m->control_pressed) { break; }
647             
648             //make matrix with scores in it
649             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
650             for (int j = 0; j < m->getNumGroups(); j++) {
651                 sims[j].resize(m->getNumGroups(), 0.0);
652             }
653             
654             int count = 0;
655                         for (int r=0; r<m->getNumGroups(); r++) { 
656                                 for (int l = 0; l < r; l++) {
657                     double sim = -(dists[i][count]-1.0);
658                                         sims[r][l] = sim;
659                                         sims[l][r] = sim;
660                                         count++;
661                                 }
662                         }
663
664             //create tree
665             Tree* tempTree = new Tree(&myct, sims);
666             tempTree->assembleTree();
667             
668             trees.push_back(tempTree);
669             
670             //print tree
671             tempTree->print(outAll);
672         }
673         
674         outAll.close();
675         
676         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
677         
678         return trees;
679     }
680         catch(exception& e) {
681                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
682                 exit(1);
683         }
684 }
685 /**************************************************************************************************/
686
687 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
688         try {
689         
690         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
691         vector< vector<string> > namesOfGroupCombos;
692         for (int a=0; a<numGroups; a++) { 
693             for (int l = 0; l < a; l++) {       
694                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
695                 namesOfGroupCombos.push_back(groups);
696             }
697         }
698         
699         lines.clear();
700         
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
702         if(processors != 1){
703             int numPairs = namesOfGroupCombos.size();
704             int numPairsPerProcessor = numPairs / processors;
705             
706             for (int i = 0; i < processors; i++) {
707                 int startPos = i * numPairsPerProcessor;
708                 if(i == processors - 1){
709                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
710                 }
711                 lines.push_back(linePair(startPos, numPairsPerProcessor));
712             }
713         }
714 #endif
715         
716         
717         //get scores for random trees
718         for (int j = 0; j < iters; j++) {
719             cout << j << endl; 
720 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
721             if(processors == 1){
722                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
723             }else{
724                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
725             }
726 #else
727             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
728 #endif
729             
730             if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
731             
732             //report progress
733             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
734         }
735         lines.clear();
736         
737         //find the signifigance of the score for summary file
738         for (int f = 0; f < numComp; f++) {
739             //sort random scores
740             sort(rScores[f].begin(), rScores[f].end());
741             
742             //the index of the score higher than yours is returned 
743             //so if you have 1000 random trees the index returned is 100 
744             //then there are 900 trees with a score greater then you. 
745             //giving you a signifigance of 0.900
746             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
747             
748             //the signifigance is the number of trees with the users score or higher 
749             WScoreSig.push_back((iters-index)/(float)iters);
750         }
751         
752         //out << "Tree# " << i << endl;
753         calculateFreqsCumuls();
754         printWeightedFile();
755         
756         delete output;
757         
758         return 0;
759     }
760         catch(exception& e) {
761                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
762                 exit(1);
763         }
764 }
765 /**************************************************************************************************/
766
767 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
768         try {
769 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
770                 int process = 1;
771                 vector<int> processIDS;
772                 
773                 EstOutput results;
774                 
775                 //loop through and create all the processes you want
776                 while (process != processors) {
777                         int pid = fork();
778                         
779                         if (pid > 0) {
780                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
781                                 process++;
782                         }else if (pid == 0){
783                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
784                         
785                                 //pass numSeqs to parent
786                                 ofstream out;
787                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
788                                 m->openOutputFile(tempFile, out);
789                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
790                                 out.close();
791                                 
792                                 exit(0);
793                         }else { 
794                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
795                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
796                                 exit(0);
797                         }
798                 }
799                 
800                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
801                 
802                 //force parent to wait until all the processes are done
803                 for (int i=0;i<(processors-1);i++) { 
804                         int temp = processIDS[i];
805                         wait(&temp);
806                 }
807                 
808                 //get data created by processes
809                 for (int i=0;i<(processors-1);i++) { 
810         
811                         ifstream in;
812                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
813                         m->openInputFile(s, in);
814                         
815                         double tempScore;
816                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
817                         in.close();
818                         m->mothurRemove(s);
819                 }
820                 
821                 return 0;
822 #endif          
823         }
824         catch(exception& e) {
825                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
826                 exit(1);
827         }
828 }
829
830 /**************************************************************************************************/
831 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
832  try {
833                 Tree* randT = new Tree(ct);
834      
835         Weighted weighted(includeRoot);
836      
837                 for (int h = start; h < (start+num); h++) {
838         
839                         if (m->control_pressed) { return 0; }
840                 
841                         //initialize weighted score
842                         string groupA = namesOfGroupCombos[h][0]; 
843                         string groupB = namesOfGroupCombos[h][1];
844                         
845                         //copy T[i]'s info.
846                         randT->getCopy(t);
847                          
848                         //create a random tree with same topology as T[i], but different labels
849                         randT->assembleRandomUnifracTree(groupA, groupB);
850                         
851                         if (m->control_pressed) { delete randT;  return 0;  }
852
853                         //get wscore of random tree
854                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
855                 
856                         if (m->control_pressed) { delete randT;  return 0;  }
857                                                                                 
858                         //save scores
859                         scores[h].push_back(randomData[0]);
860                 }
861         
862                 delete randT;
863         
864                 return 0;
865
866         }
867         catch(exception& e) {
868                 m->errorOut(e, "UnifracWeightedCommand", "driver");
869                 exit(1);
870         }
871 }
872 /***********************************************************/
873 void UnifracWeightedCommand::printWeightedFile() {
874         try {
875                 vector<double> data;
876                 vector<string> tags;
877                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
878                 
879                 for(int a = 0; a < numComp; a++) {
880                         output->initFile(groupComb[a], tags);
881                         //print each line
882                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
883                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
884                                 output->output(data);
885                                 data.clear();
886                         } 
887                         output->resetFile();
888                 }
889         }
890         catch(exception& e) {
891                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
892                 exit(1);
893         }
894 }
895
896
897 /***********************************************************/
898 void UnifracWeightedCommand::printWSummaryFile() {
899         try {
900                 //column headers
901                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
902                 m->mothurOut("Tree#\tGroups\tWScore\t");
903                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
904                 outSum << endl; m->mothurOutEndLine();
905                 
906                 //format output
907                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
908                 
909                 //print each line
910                 int count = 0;
911                 for (int i = 0; i < T.size(); i++) { 
912                         for (int j = 0; j < numComp; j++) {
913                                 if (random) {
914                                         if (WScoreSig[count] > (1/(float)iters)) {
915                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
916                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
917                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
918                                         }else{
919                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
920                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
921                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
922                                         }
923                                 }else{
924                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
925                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
926                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
927                                 }
928                                 count++;
929                         }
930                 }
931                 outSum.close();
932         }
933         catch(exception& e) {
934                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
935                 exit(1);
936         }
937 }
938 /***********************************************************/
939 void UnifracWeightedCommand::createPhylipFile() {
940         try {
941                 int count = 0;
942                 //for each tree
943                 for (int i = 0; i < T.size(); i++) { 
944                 
945             string phylipFileName;
946                         map<string, string> variables; 
947             variables["[filename]"] = outputDir + m->getSimpleName(treefile);
948             variables["[tag]"] = toString(i+1);
949             if ((outputForm == "lt") || (outputForm == "square")) {
950                 variables["[tag2]"] = "weighted.phylip";
951                 phylipFileName = getOutputFileName("phylip",variables);
952                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
953             }else { //column
954                 variables["[tag2]"] = "weighted.column";
955                 phylipFileName = getOutputFileName("column",variables);
956                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
957             }
958
959                         
960                         ofstream out;
961                         m->openOutputFile(phylipFileName, out);
962                         
963                         if ((outputForm == "lt") || (outputForm == "square")) {
964                                 //output numSeqs
965                                 out << m->getNumGroups() << endl;
966                         }
967
968                         //make matrix with scores in it
969                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
970                         for (int i = 0; i < m->getNumGroups(); i++) {
971                                 dists[i].resize(m->getNumGroups(), 0.0);
972                         }
973                         
974                         //flip it so you can print it
975                         for (int r=0; r<m->getNumGroups(); r++) { 
976                                 for (int l = 0; l < r; l++) {
977                                         dists[r][l] = utreeScores[count];
978                                         dists[l][r] = utreeScores[count];
979                                         count++;
980                                 }
981                         }
982
983                         //output to file
984                         for (int r=0; r<m->getNumGroups(); r++) { 
985                                 //output name
986                                 string name = (m->getGroups())[r];
987                                 if (name.length() < 10) { //pad with spaces to make compatible
988                                         while (name.length() < 10) {  name += " ";  }
989                                 }
990                                 
991                                 if (outputForm == "lt") {
992                                         out << name << '\t';
993                                         
994                                         //output distances
995                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
996                                         out << endl;
997                                 }else if (outputForm == "square") {
998                                         out << name << '\t';
999                                         
1000                                         //output distances
1001                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
1002                                         out << endl;
1003                                 }else{
1004                                         //output distances
1005                                         for (int l = 0; l < r; l++) {   
1006                                                 string otherName = (m->getGroups())[l];
1007                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
1008                                                         while (otherName.length() < 10) {  otherName += " ";  }
1009                                                 }
1010                                                 
1011                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
1012                                         }
1013                                 }
1014                         }
1015                         out.close();
1016                 }
1017         }
1018         catch(exception& e) {
1019                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1020                 exit(1);
1021         }
1022 }
1023 /***********************************************************/
1024 int UnifracWeightedCommand::findIndex(float score, int index) {
1025         try{
1026                 for (int i = 0; i < rScores[index].size(); i++) {
1027                         if (rScores[index][i] >= score) {       return i;       }
1028                 }
1029                 return rScores[index].size();
1030         }
1031         catch(exception& e) {
1032                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1033                 exit(1);
1034         }
1035 }
1036
1037 /***********************************************************/
1038
1039 void UnifracWeightedCommand::calculateFreqsCumuls() {
1040         try {
1041                 //clear out old tree values
1042                 rScoreFreq.clear();
1043                 rScoreFreq.resize(numComp);
1044                 rCumul.clear();
1045                 rCumul.resize(numComp);
1046                 validScores.clear();
1047         
1048                 //calculate frequency
1049                 for (int f = 0; f < numComp; f++) {
1050                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1051                                 validScores[rScores[f][i]] = rScores[f][i];
1052                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1053                                 if (it != rScoreFreq[f].end()) {
1054                                         rScoreFreq[f][rScores[f][i]]++;
1055                                 }else{
1056                                         rScoreFreq[f][rScores[f][i]] = 1;
1057                                 }
1058                         }
1059                 }
1060                 
1061                 //calculate rcumul
1062                 for(int a = 0; a < numComp; a++) {
1063                         float rcumul = 1.0000;
1064                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1065                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1066                                 //make rscoreFreq map and rCumul
1067                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1068                                 rCumul[a][it->first] = rcumul;
1069                                 //get percentage of random trees with that info
1070                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1071                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1072                         }
1073                 }
1074
1075         }
1076         catch(exception& e) {
1077                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1078                 exit(1);
1079         }
1080 }
1081 /***********************************************************/
1082
1083
1084
1085
1086