]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
28                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){  
68         try {
69         string outputFileName = "";
70                 map<string, vector<string> >::iterator it;
71         
72         //is this a type this command creates
73         it = outputTypes.find(type);
74         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75         else {
76             if (type == "weighted")            {   outputFileName =  "weighted";   }
77             else if (type == "wsummary")        {   outputFileName =  "wsummary";   }
78             else if (type == "phylip")           {   outputFileName =  "dist";   }
79             else if (type == "column")           {   outputFileName =  "dist";   }
80             else if (type == "tree")             {   outputFileName =  "tre";   }
81             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
82         }
83         return outputFileName;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 UnifracWeightedCommand::UnifracWeightedCommand(){       
92         try {
93                 abort = true; calledHelp = true; 
94                 setParameters();
95                 vector<string> tempOutNames;
96                 outputTypes["weighted"] = tempOutNames;
97                 outputTypes["wsummary"] = tempOutNames;
98                 outputTypes["phylip"] = tempOutNames;
99                 outputTypes["column"] = tempOutNames;
100         outputTypes["tree"] = tempOutNames;
101         }
102         catch(exception& e) {
103                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
104                 exit(1);
105         }
106 }
107
108 /***********************************************************/
109 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
110         try {
111                 abort = false; calledHelp = false;   
112                         
113                 //allow user to run help
114                 if(option == "help") { help(); abort = true; calledHelp = true; }
115                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
116                 
117                 else {
118                         vector<string> myArray = setParameters();
119                         
120                         OptionParser parser(option);
121                         map<string,string> parameters=parser.getParameters();
122                         map<string,string>::iterator it;
123                         
124                         ValidParameters validParameter;
125                 
126                         //check to make sure all parameters are valid for command
127                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["weighted"] = tempOutNames;
134                         outputTypes["wsummary"] = tempOutNames;
135                         outputTypes["phylip"] = tempOutNames;
136                         outputTypes["column"] = tempOutNames;
137             outputTypes["tree"] = tempOutNames;
138                         
139                         //if the user changes the input directory command factory will send this info to us in the output parameter 
140                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
141                         if (inputDir == "not found"){   inputDir = "";          }
142                         else {
143                                 string path;
144                                 it = parameters.find("tree");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
150                                 }
151                                 
152                                 it = parameters.find("group");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                         }
168                         
169                         //check for required parameters
170                         treefile = validParameter.validFile(parameters, "tree", true);
171                         if (treefile == "not open") { treefile = ""; abort = true; }
172                         else if (treefile == "not found") {                             //if there is a current design file, use it
173                                 treefile = m->getTreeFile(); 
174                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
175                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
176                         }else { m->setTreeFile(treefile); }     
177                         
178                         //check for required parameters
179                         groupfile = validParameter.validFile(parameters, "group", true);
180                         if (groupfile == "not open") { abort = true; }
181                         else if (groupfile == "not found") { groupfile = ""; }
182                         else { m->setGroupFile(groupfile); }
183                         
184                         namefile = validParameter.validFile(parameters, "name", true);
185                         if (namefile == "not open") { namefile = ""; abort = true; }
186                         else if (namefile == "not found") { namefile = ""; }
187                         else { m->setNameFile(namefile); }
188                         
189                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
190                         
191                                                                                                                                         
192                         //check for optional parameter and set defaults
193                         // ...at some point should added some additional type checking...
194                         groups = validParameter.validFile(parameters, "groups", false);                 
195                         if (groups == "not found") { groups = ""; }
196                         else { 
197                                 m->splitAtDash(groups, Groups);
198                                 m->setGroups(Groups);
199                         }
200                                 
201                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
202                         m->mothurConvert(itersString, iters); 
203                         
204                         string temp = validParameter.validFile(parameters, "distance", false);                  
205                         if (temp == "not found") { phylip = false; outputForm = ""; }
206                         else{
207                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
208                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
209                         }
210                         
211                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
212                         random = m->isTrue(temp);
213                         
214                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
215                         includeRoot = m->isTrue(temp);
216                         
217                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
218                         m->setProcessors(temp);
219                         m->mothurConvert(temp, processors);
220             
221             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
222                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
223             else {  
224                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
225                 else { subsample = false; }
226             }
227                         
228             if (!subsample) { subsampleIters = 0;   }
229             else { subsampleIters = iters;          }
230             
231             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
232                         consensus = m->isTrue(temp);
233             
234                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
235                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
236             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
237             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
238             
239                         if (namefile == "") {
240                                 vector<string> files; files.push_back(treefile);
241                                 parser.getNameFile(files);
242                         }
243                 }
244                 
245                 
246         }
247         catch(exception& e) {
248                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
249                 exit(1);
250         }
251 }
252 /***********************************************************/
253 int UnifracWeightedCommand::execute() {
254         try {
255         
256                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
257                 
258                 m->setTreeFile(treefile);
259                 
260         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
261         T = reader->getTrees();
262         tmap = T[0]->getTreeMap();
263         map<string, string> nameMap = reader->getNames();
264         map<string, string> unique2Dup = reader->getNameMap();
265         delete reader;
266     
267         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
268                                 
269                 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
270                 m->openOutputFile(sumFile, outSum);
271                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
272                 
273         SharedUtil util;
274                 string s; //to make work with setgroups
275                 Groups = m->getGroups();
276                 vector<string> nameGroups = tmap->getNamesOfGroups();
277                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
278                 m->setGroups(Groups);
279                 
280         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
281         
282                 Weighted weighted(includeRoot);
283                         
284                 int start = time(NULL);
285             
286         //set or check size
287         if (subsample) {
288             //user has not set size, set size = smallest samples size
289             if (subsampleSize == -1) { 
290                 vector<string> temp; temp.push_back(Groups[0]);
291                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
292                 for (int i = 1; i < Groups.size(); i++) {
293                     temp.clear(); temp.push_back(Groups[i]);
294                     int thisSize = (tmap->getNamesSeqs(temp)).size();
295                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
296                 }
297                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
298             }else { //eliminate any too small groups
299                 vector<string> newGroups = Groups;
300                 Groups.clear();
301                 for (int i = 0; i < newGroups.size(); i++) {
302                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
303                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
304                     int thisSize = thisGroupsSeqs.size();
305                     
306                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
307                     else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
308                 } 
309                 m->setGroups(Groups);
310             }
311         }
312         
313         //here in case some groups are removed by subsample
314         util.getCombos(groupComb, Groups, numComp);
315         
316         if (numComp < processors) { processors = numComp; }
317         
318         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
319         
320         //get weighted scores for users trees
321         for (int i = 0; i < T.size(); i++) {
322             
323             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
324             
325             counter = 0;
326             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
327             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
328             
329             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
330             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
331             
332             if (random) {  
333                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);  
334                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
335                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
336             } 
337             
338             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
339             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
340             
341             //save users score
342             for (int s=0; s<numComp; s++) {
343                 //add users score to vector of user scores
344                 uScores[s].push_back(userData[s]);
345                 //save users tree score for summary file
346                 utreeScores.push_back(userData[s]);
347             }
348             
349             if (random) {  runRandomCalcs(T[i], userData); }
350             
351             //clear data
352             rScores.clear();
353             uScores.clear();
354             validScores.clear();
355             
356             //subsample loop
357             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
358             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
359                 
360                 if (m->control_pressed) { break; }
361                 
362                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
363                 TreeMap* newTmap = new TreeMap();
364                 //newTmap->getCopy(*tmap);
365                 
366                 //SubSample sample;
367                //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
368                 
369                 //uses method of setting groups to doNotIncludeMe
370                 SubSample sample;
371                 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
372
373                 //call new weighted function
374                 vector<double> iterData; iterData.resize(numComp,0);
375                 Weighted thisWeighted(includeRoot);
376                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
377                 
378                 //save data to make ave dist, std dist
379                 calcDistsTotals.push_back(iterData);
380                 
381                 delete newTmap;
382                 delete subSampleTree;
383                 
384                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
385             }
386             
387             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
388             
389             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
390             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
391         }
392         
393                 
394                 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0;  }
395                 
396         if (phylip) {   createPhylipFile();             }
397     
398                 printWSummaryFile();
399                 
400                 //clear out users groups
401                 m->clearGroups();
402                 delete tmap; 
403                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
404                 
405                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
406                 
407                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
408                 
409                 //set phylip file as new current phylipfile
410                 string current = "";
411                 itTypes = outputTypes.find("phylip");
412                 if (itTypes != outputTypes.end()) {
413                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
414                 }
415                 
416                 //set column file as new current columnfile
417                 itTypes = outputTypes.find("column");
418                 if (itTypes != outputTypes.end()) {
419                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
420                 }
421                 
422                 m->mothurOutEndLine();
423                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
424                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
425                 m->mothurOutEndLine();
426                 
427                 return 0;
428                 
429         }
430         catch(exception& e) {
431                 m->errorOut(e, "UnifracWeightedCommand", "execute");
432                 exit(1);
433         }
434 }
435 /**************************************************************************************************/
436 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
437         try {
438         //we need to find the average distance and standard deviation for each groups distance
439         
440         //finds sum
441         vector<double> averages; averages.resize(numComp, 0); 
442         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
443             for (int i = 0; i < dists[thisIter].size(); i++) {  
444                 averages[i] += dists[thisIter][i];
445             }
446         }
447         
448         //finds average.
449         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
450         
451         //find standard deviation
452         vector<double> stdDev; stdDev.resize(numComp, 0);
453                 
454         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
455             for (int j = 0; j < dists[thisIter].size(); j++) {
456                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
457             }
458         }
459         for (int i = 0; i < stdDev.size(); i++) {  
460             stdDev[i] /= (float) subsampleIters; 
461             stdDev[i] = sqrt(stdDev[i]);
462         }
463         
464         //make matrix with scores in it
465         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
466         for (int i = 0; i < m->getNumGroups(); i++) {
467             avedists[i].resize(m->getNumGroups(), 0.0);
468         }
469         
470         //make matrix with scores in it
471         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
472         for (int i = 0; i < m->getNumGroups(); i++) {
473             stddists[i].resize(m->getNumGroups(), 0.0);
474         }
475         
476         //flip it so you can print it
477         int count = 0;
478         for (int r=0; r<m->getNumGroups(); r++) { 
479             for (int l = 0; l < r; l++) {
480                 avedists[r][l] = averages[count];
481                 avedists[l][r] = averages[count];
482                 stddists[r][l] = stdDev[count];
483                 stddists[l][r] = stdDev[count];
484                 count++;
485             }
486         }
487         
488         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
489         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
490         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
491         ofstream out;
492         m->openOutputFile(aveFileName, out);
493         
494         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
495         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
496         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
497         ofstream outStd;
498         m->openOutputFile(stdFileName, outStd);
499         
500         if ((outputForm == "lt") || (outputForm == "square")) {
501             //output numSeqs
502             out << m->getNumGroups() << endl;
503             outStd << m->getNumGroups() << endl;
504         }
505         
506         //output to file
507         for (int r=0; r<m->getNumGroups(); r++) { 
508             //output name
509             string name = (m->getGroups())[r];
510             if (name.length() < 10) { //pad with spaces to make compatible
511                 while (name.length() < 10) {  name += " ";  }
512             }
513             
514             if (outputForm == "lt") {
515                 out << name << '\t';
516                 outStd << name << '\t';
517                 
518                 //output distances
519                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
520                 out << endl;  outStd << endl;
521             }else if (outputForm == "square") {
522                 out << name << '\t';
523                 outStd << name << '\t';
524                 
525                 //output distances
526                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
527                 out << endl; outStd << endl;
528             }else{
529                 //output distances
530                 for (int l = 0; l < r; l++) {   
531                     string otherName = (m->getGroups())[l];
532                     if (otherName.length() < 10) { //pad with spaces to make compatible
533                         while (otherName.length() < 10) {  otherName += " ";  }
534                     }
535                     
536                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
537                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
538                 }
539             }
540         }
541         out.close();
542         outStd.close();
543         
544         return 0;
545     }
546         catch(exception& e) {
547                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
548                 exit(1);
549         }
550 }
551
552 /**************************************************************************************************/
553 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
554         try {
555         
556         //used in tree constructor 
557         m->runParse = false;
558         
559         //create treemap class from groupmap for tree class to use
560         TreeMap newTmap;
561         newTmap.makeSim(m->getGroups());
562         
563         //clear  old tree names if any
564         m->Treenames.clear();
565         
566         //fills globaldatas tree names
567         m->Treenames = m->getGroups();
568         
569         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
570         
571         if (m->control_pressed) { return 0; }
572         
573         Consensus con;
574         Tree* conTree = con.getTree(newTrees);
575         
576         //create a new filename
577         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");                               
578         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
579         ofstream outTree;
580         m->openOutputFile(conFile, outTree);
581         
582         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
583         outTree.close();
584         
585         return 0;
586
587     }
588         catch(exception& e) {
589                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
590                 exit(1);
591         }
592 }
593 /**************************************************************************************************/
594
595 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
596         try {
597         
598         vector<Tree*> trees;
599         
600         //create a new filename
601         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");                             
602         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
603         
604         ofstream outAll;
605         m->openOutputFile(outputFile, outAll);
606         
607
608         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
609             
610             if (m->control_pressed) { break; }
611             
612             //make matrix with scores in it
613             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
614             for (int j = 0; j < m->getNumGroups(); j++) {
615                 sims[j].resize(m->getNumGroups(), 0.0);
616             }
617             
618             int count = 0;
619                         for (int r=0; r<m->getNumGroups(); r++) { 
620                                 for (int l = 0; l < r; l++) {
621                     double sim = -(dists[i][count]-1.0);
622                                         sims[r][l] = sim;
623                                         sims[l][r] = sim;
624                                         count++;
625                                 }
626                         }
627
628             //create tree
629             Tree* tempTree = new Tree(&mytmap, sims);
630             map<string, string> empty;
631             tempTree->assembleTree(empty);
632             
633             trees.push_back(tempTree);
634             
635             //print tree
636             tempTree->print(outAll);
637         }
638         
639         outAll.close();
640         
641         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
642         
643         return trees;
644     }
645         catch(exception& e) {
646                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
647                 exit(1);
648         }
649 }
650 /**************************************************************************************************/
651
652 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
653         try {
654         
655         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
656         vector< vector<string> > namesOfGroupCombos;
657         for (int a=0; a<numGroups; a++) { 
658             for (int l = 0; l < a; l++) {       
659                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
660                 namesOfGroupCombos.push_back(groups);
661             }
662         }
663         
664         lines.clear();
665         
666 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
667         if(processors != 1){
668             int numPairs = namesOfGroupCombos.size();
669             int numPairsPerProcessor = numPairs / processors;
670             
671             for (int i = 0; i < processors; i++) {
672                 int startPos = i * numPairsPerProcessor;
673                 if(i == processors - 1){
674                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
675                 }
676                 lines.push_back(linePair(startPos, numPairsPerProcessor));
677             }
678         }
679 #endif
680         
681         
682         //get scores for random trees
683         for (int j = 0; j < iters; j++) {
684             
685 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
686             if(processors == 1){
687                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
688             }else{
689                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
690             }
691 #else
692             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
693 #endif
694             
695             if (m->control_pressed) { delete tmap;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
696             
697             //report progress
698             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
699         }
700         lines.clear();
701         
702         //find the signifigance of the score for summary file
703         for (int f = 0; f < numComp; f++) {
704             //sort random scores
705             sort(rScores[f].begin(), rScores[f].end());
706             
707             //the index of the score higher than yours is returned 
708             //so if you have 1000 random trees the index returned is 100 
709             //then there are 900 trees with a score greater then you. 
710             //giving you a signifigance of 0.900
711             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
712             
713             //the signifigance is the number of trees with the users score or higher 
714             WScoreSig.push_back((iters-index)/(float)iters);
715         }
716         
717         //out << "Tree# " << i << endl;
718         calculateFreqsCumuls();
719         printWeightedFile();
720         
721         delete output;
722         
723         return 0;
724     }
725         catch(exception& e) {
726                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
727                 exit(1);
728         }
729 }
730 /**************************************************************************************************/
731
732 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
733         try {
734 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
735                 int process = 1;
736                 vector<int> processIDS;
737                 
738                 EstOutput results;
739                 
740                 //loop through and create all the processes you want
741                 while (process != processors) {
742                         int pid = fork();
743                         
744                         if (pid > 0) {
745                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
746                                 process++;
747                         }else if (pid == 0){
748                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
749                         
750                                 //pass numSeqs to parent
751                                 ofstream out;
752                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
753                                 m->openOutputFile(tempFile, out);
754                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
755                                 out.close();
756                                 
757                                 exit(0);
758                         }else { 
759                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
760                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
761                                 exit(0);
762                         }
763                 }
764                 
765                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
766                 
767                 //force parent to wait until all the processes are done
768                 for (int i=0;i<(processors-1);i++) { 
769                         int temp = processIDS[i];
770                         wait(&temp);
771                 }
772                 
773                 //get data created by processes
774                 for (int i=0;i<(processors-1);i++) { 
775         
776                         ifstream in;
777                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
778                         m->openInputFile(s, in);
779                         
780                         double tempScore;
781                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
782                         in.close();
783                         m->mothurRemove(s);
784                 }
785                 
786                 return 0;
787 #endif          
788         }
789         catch(exception& e) {
790                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
791                 exit(1);
792         }
793 }
794
795 /**************************************************************************************************/
796 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
797  try {
798                 Tree* randT = new Tree(tmap);
799      
800         Weighted weighted(includeRoot);
801      
802                 for (int h = start; h < (start+num); h++) {
803         
804                         if (m->control_pressed) { return 0; }
805                 
806                         //initialize weighted score
807                         string groupA = namesOfGroupCombos[h][0]; 
808                         string groupB = namesOfGroupCombos[h][1];
809                         
810                         //copy T[i]'s info.
811                         randT->getCopy(t);
812                          
813                         //create a random tree with same topology as T[i], but different labels
814                         randT->assembleRandomUnifracTree(groupA, groupB);
815                         
816                         if (m->control_pressed) { delete randT;  return 0;  }
817
818                         //get wscore of random tree
819                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
820                 
821                         if (m->control_pressed) { delete randT;  return 0;  }
822                                                                                 
823                         //save scores
824                         scores[h].push_back(randomData[0]);
825                 }
826         
827                 delete randT;
828         
829                 return 0;
830
831         }
832         catch(exception& e) {
833                 m->errorOut(e, "UnifracWeightedCommand", "driver");
834                 exit(1);
835         }
836 }
837 /***********************************************************/
838 void UnifracWeightedCommand::printWeightedFile() {
839         try {
840                 vector<double> data;
841                 vector<string> tags;
842                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
843                 
844                 for(int a = 0; a < numComp; a++) {
845                         output->initFile(groupComb[a], tags);
846                         //print each line
847                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
848                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
849                                 output->output(data);
850                                 data.clear();
851                         } 
852                         output->resetFile();
853                 }
854         }
855         catch(exception& e) {
856                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
857                 exit(1);
858         }
859 }
860
861
862 /***********************************************************/
863 void UnifracWeightedCommand::printWSummaryFile() {
864         try {
865                 //column headers
866                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
867                 m->mothurOut("Tree#\tGroups\tWScore\t");
868                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
869                 outSum << endl; m->mothurOutEndLine();
870                 
871                 //format output
872                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
873                 
874                 //print each line
875                 int count = 0;
876                 for (int i = 0; i < T.size(); i++) { 
877                         for (int j = 0; j < numComp; j++) {
878                                 if (random) {
879                                         if (WScoreSig[count] > (1/(float)iters)) {
880                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
881                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
882                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
883                                         }else{
884                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
885                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
886                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
887                                         }
888                                 }else{
889                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
890                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
891                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
892                                 }
893                                 count++;
894                         }
895                 }
896                 outSum.close();
897         }
898         catch(exception& e) {
899                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
900                 exit(1);
901         }
902 }
903 /***********************************************************/
904 void UnifracWeightedCommand::createPhylipFile() {
905         try {
906                 int count = 0;
907                 //for each tree
908                 for (int i = 0; i < T.size(); i++) { 
909                 
910                         string phylipFileName;
911                         if ((outputForm == "lt") || (outputForm == "square")) {
912                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
913                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
914                         }else { //column
915                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
916                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
917                         }
918                         
919                         ofstream out;
920                         m->openOutputFile(phylipFileName, out);
921                         
922                         if ((outputForm == "lt") || (outputForm == "square")) {
923                                 //output numSeqs
924                                 out << m->getNumGroups() << endl;
925                         }
926
927                         //make matrix with scores in it
928                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
929                         for (int i = 0; i < m->getNumGroups(); i++) {
930                                 dists[i].resize(m->getNumGroups(), 0.0);
931                         }
932                         
933                         //flip it so you can print it
934                         for (int r=0; r<m->getNumGroups(); r++) { 
935                                 for (int l = 0; l < r; l++) {
936                                         dists[r][l] = utreeScores[count];
937                                         dists[l][r] = utreeScores[count];
938                                         count++;
939                                 }
940                         }
941
942                         //output to file
943                         for (int r=0; r<m->getNumGroups(); r++) { 
944                                 //output name
945                                 string name = (m->getGroups())[r];
946                                 if (name.length() < 10) { //pad with spaces to make compatible
947                                         while (name.length() < 10) {  name += " ";  }
948                                 }
949                                 
950                                 if (outputForm == "lt") {
951                                         out << name << '\t';
952                                         
953                                         //output distances
954                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
955                                         out << endl;
956                                 }else if (outputForm == "square") {
957                                         out << name << '\t';
958                                         
959                                         //output distances
960                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
961                                         out << endl;
962                                 }else{
963                                         //output distances
964                                         for (int l = 0; l < r; l++) {   
965                                                 string otherName = (m->getGroups())[l];
966                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
967                                                         while (otherName.length() < 10) {  otherName += " ";  }
968                                                 }
969                                                 
970                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
971                                         }
972                                 }
973                         }
974                         out.close();
975                 }
976         }
977         catch(exception& e) {
978                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
979                 exit(1);
980         }
981 }
982 /***********************************************************/
983 int UnifracWeightedCommand::findIndex(float score, int index) {
984         try{
985                 for (int i = 0; i < rScores[index].size(); i++) {
986                         if (rScores[index][i] >= score) {       return i;       }
987                 }
988                 return rScores[index].size();
989         }
990         catch(exception& e) {
991                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
992                 exit(1);
993         }
994 }
995
996 /***********************************************************/
997
998 void UnifracWeightedCommand::calculateFreqsCumuls() {
999         try {
1000                 //clear out old tree values
1001                 rScoreFreq.clear();
1002                 rScoreFreq.resize(numComp);
1003                 rCumul.clear();
1004                 rCumul.resize(numComp);
1005                 validScores.clear();
1006         
1007                 //calculate frequency
1008                 for (int f = 0; f < numComp; f++) {
1009                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1010                                 validScores[rScores[f][i]] = rScores[f][i];
1011                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1012                                 if (it != rScoreFreq[f].end()) {
1013                                         rScoreFreq[f][rScores[f][i]]++;
1014                                 }else{
1015                                         rScoreFreq[f][rScores[f][i]] = 1;
1016                                 }
1017                         }
1018                 }
1019                 
1020                 //calculate rcumul
1021                 for(int a = 0; a < numComp; a++) {
1022                         float rcumul = 1.0000;
1023                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1024                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1025                                 //make rscoreFreq map and rCumul
1026                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1027                                 rCumul[a][it->first] = rcumul;
1028                                 //get percentage of random trees with that info
1029                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1030                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1031                         }
1032                 }
1033
1034         }
1035         catch(exception& e) {
1036                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1037                 exit(1);
1038         }
1039 }
1040 /***********************************************************/
1041
1042
1043
1044
1045