]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
added sets to amova and homova commands. added oligos to make.contigs. added metadat...
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
26         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
27         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
28                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
29                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){ 
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){  
69         try {
70         string outputFileName = "";
71                 map<string, vector<string> >::iterator it;
72         
73         //is this a type this command creates
74         it = outputTypes.find(type);
75         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76         else {
77             if (type == "weighted")            {   outputFileName =  "weighted";   }
78             else if (type == "wsummary")        {   outputFileName =  "wsummary";   }
79             else if (type == "phylip")           {   outputFileName =  "dist";   }
80             else if (type == "column")           {   outputFileName =  "dist";   }
81             else if (type == "tree")             {   outputFileName =  "tre";   }
82             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
83         }
84         return outputFileName;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 UnifracWeightedCommand::UnifracWeightedCommand(){       
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["weighted"] = tempOutNames;
98                 outputTypes["wsummary"] = tempOutNames;
99                 outputTypes["phylip"] = tempOutNames;
100                 outputTypes["column"] = tempOutNames;
101         outputTypes["tree"] = tempOutNames;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
105                 exit(1);
106         }
107 }
108
109 /***********************************************************/
110 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
111         try {
112                 abort = false; calledHelp = false;   
113                         
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters=parser.getParameters();
123                         map<string,string>::iterator it;
124                         
125                         ValidParameters validParameter;
126                 
127                         //check to make sure all parameters are valid for command
128                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["weighted"] = tempOutNames;
135                         outputTypes["wsummary"] = tempOutNames;
136                         outputTypes["phylip"] = tempOutNames;
137                         outputTypes["column"] = tempOutNames;
138             outputTypes["tree"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("tree");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
151                                 }
152                                 
153                                 it = parameters.find("group");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168                 
169                 it = parameters.find("count");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
175                                 }
176                         }
177                         
178                         //check for required parameters
179                         treefile = validParameter.validFile(parameters, "tree", true);
180                         if (treefile == "not open") { treefile = ""; abort = true; }
181                         else if (treefile == "not found") {                             //if there is a current design file, use it
182                                 treefile = m->getTreeFile(); 
183                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
184                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
185                         }else { m->setTreeFile(treefile); }     
186                         
187                         //check for required parameters
188                         groupfile = validParameter.validFile(parameters, "group", true);
189                         if (groupfile == "not open") { abort = true; }
190                         else if (groupfile == "not found") { groupfile = ""; }
191                         else { m->setGroupFile(groupfile); }
192                         
193                         namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not open") { namefile = ""; abort = true; }
195                         else if (namefile == "not found") { namefile = ""; }
196                         else { m->setNameFile(namefile); }
197                         
198             countfile = validParameter.validFile(parameters, "count", true);
199                         if (countfile == "not open") { countfile = ""; abort = true; }
200                         else if (countfile == "not found") { countfile = "";  } 
201                         else { m->setCountTableFile(countfile); }
202             
203             if ((namefile != "") && (countfile != "")) {
204                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
205             }
206                         
207             if ((groupfile != "") && (countfile != "")) {
208                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
209             }
210
211                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
212                         
213                                                                                                                                         
214                         //check for optional parameter and set defaults
215                         // ...at some point should added some additional type checking...
216                         groups = validParameter.validFile(parameters, "groups", false);                 
217                         if (groups == "not found") { groups = ""; }
218                         else { 
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                                 
223                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
224                         m->mothurConvert(itersString, iters); 
225                         
226                         string temp = validParameter.validFile(parameters, "distance", false);                  
227                         if (temp == "not found") { phylip = false; outputForm = ""; }
228                         else{
229                 if (temp=="phylip") { temp = "lt"; }
230                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
231                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
232                         }
233                         
234                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
235                         random = m->isTrue(temp);
236                         
237                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
238                         includeRoot = m->isTrue(temp);
239                         
240                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
241                         m->setProcessors(temp);
242                         m->mothurConvert(temp, processors);
243             
244             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
245                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
246             else {  
247                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
248                 else { subsample = false; }
249             }
250                         
251             if (!subsample) { subsampleIters = 0;   }
252             else { subsampleIters = iters;          }
253             
254             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
255                         consensus = m->isTrue(temp);
256             
257                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
258                         if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
259             else {  
260                 CountTable testCt; 
261                 if ((!testCt.testGroups(countfile)) && (subsample)) {
262                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
263                 }
264             }
265             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
267             
268                         if (countfile=="") {
269                 if (namefile == "") {
270                     vector<string> files; files.push_back(treefile);
271                     parser.getNameFile(files);
272                 } 
273             }
274                 }
275                 
276                 
277         }
278         catch(exception& e) {
279                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
280                 exit(1);
281         }
282 }
283 /***********************************************************/
284 int UnifracWeightedCommand::execute() {
285         try {
286         
287                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
288                 
289                 m->setTreeFile(treefile);
290                 
291         TreeReader* reader;
292         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
293         else { reader = new TreeReader(treefile, countfile); }
294         T = reader->getTrees();
295         ct = T[0]->getCountTable();
296         delete reader;
297         
298         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
299                                 
300                 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
301                 m->openOutputFile(sumFile, outSum);
302                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
303                 
304         SharedUtil util;
305                 string s; //to make work with setgroups
306                 Groups = m->getGroups();
307                 vector<string> nameGroups = ct->getNamesOfGroups();
308                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
309                 m->setGroups(Groups);
310                 
311         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
312         
313                 Weighted weighted(includeRoot);
314                         
315                 int start = time(NULL);
316             
317         //set or check size
318         if (subsample) {
319             //user has not set size, set size = smallest samples size
320             if (subsampleSize == -1) { 
321                 vector<string> temp; temp.push_back(Groups[0]);
322                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
323                 for (int i = 1; i < Groups.size(); i++) {
324                     int thisSize = ct->getGroupCount(Groups[i]);
325                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
326                 }
327                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
328             }else { //eliminate any too small groups
329                 vector<string> newGroups = Groups;
330                 Groups.clear();
331                 for (int i = 0; i < newGroups.size(); i++) {
332                     int thisSize = ct->getGroupCount(newGroups[i]);
333                     
334                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
335                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
336                 } 
337                 m->setGroups(Groups);
338             }
339         }
340         
341         //here in case some groups are removed by subsample
342         util.getCombos(groupComb, Groups, numComp);
343         
344         if (numComp < processors) { processors = numComp; }
345         
346         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
347         
348         //get weighted scores for users trees
349         for (int i = 0; i < T.size(); i++) {
350             
351             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
352             
353             counter = 0;
354             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
355             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
356             
357             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
358             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
359             
360             if (random) {  
361                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);  
362                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
363                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
364             } 
365             
366             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
367             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
368             
369             //save users score
370             for (int s=0; s<numComp; s++) {
371                 //add users score to vector of user scores
372                 uScores[s].push_back(userData[s]);
373                 //save users tree score for summary file
374                 utreeScores.push_back(userData[s]);
375             }
376             
377             if (random) {  runRandomCalcs(T[i], userData); }
378             
379             //clear data
380             rScores.clear();
381             uScores.clear();
382             validScores.clear();
383             
384             //subsample loop
385             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
386             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
387                 if (m->control_pressed) { break; }
388                 
389                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
390                 CountTable* newCt = new CountTable();
391                 
392                 int sampleTime = 0;
393                 if (m->debug) { sampleTime = time(NULL); }
394                 
395                 //uses method of setting groups to doNotIncludeMe
396                 SubSample sample;
397                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
398                 
399                 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
400                 
401                 //call new weighted function
402                 vector<double> iterData; iterData.resize(numComp,0);
403                 Weighted thisWeighted(includeRoot);
404                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
405                 
406                 //save data to make ave dist, std dist
407                 calcDistsTotals.push_back(iterData);
408                 
409                 delete newCt;
410                 delete subSampleTree;
411                 
412                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
413             }
414             
415             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
416             
417             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
418             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
419         }
420         
421                 
422                 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0;  }
423                 
424         if (phylip) {   createPhylipFile();             }
425     
426                 printWSummaryFile();
427                 
428                 //clear out users groups
429                 m->clearGroups();
430                 delete ct; 
431                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
432                 
433                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
434                 
435                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
436                 
437                 //set phylip file as new current phylipfile
438                 string current = "";
439                 itTypes = outputTypes.find("phylip");
440                 if (itTypes != outputTypes.end()) {
441                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
442                 }
443                 
444                 //set column file as new current columnfile
445                 itTypes = outputTypes.find("column");
446                 if (itTypes != outputTypes.end()) {
447                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
448                 }
449                 
450                 m->mothurOutEndLine();
451                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
452                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
453                 m->mothurOutEndLine();
454                 
455                 return 0;
456                 
457         }
458         catch(exception& e) {
459                 m->errorOut(e, "UnifracWeightedCommand", "execute");
460                 exit(1);
461         }
462 }
463 /**************************************************************************************************/
464 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
465         try {
466         //we need to find the average distance and standard deviation for each groups distance
467         
468         //finds sum
469         vector<double> averages; averages.resize(numComp, 0); 
470         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
471             for (int i = 0; i < dists[thisIter].size(); i++) {  
472                 averages[i] += dists[thisIter][i];
473             }
474         }
475         
476         //finds average.
477         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
478         
479         //find standard deviation
480         vector<double> stdDev; stdDev.resize(numComp, 0);
481                 
482         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
483             for (int j = 0; j < dists[thisIter].size(); j++) {
484                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
485             }
486         }
487         for (int i = 0; i < stdDev.size(); i++) {  
488             stdDev[i] /= (float) subsampleIters; 
489             stdDev[i] = sqrt(stdDev[i]);
490         }
491         
492         //make matrix with scores in it
493         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
494         for (int i = 0; i < m->getNumGroups(); i++) {
495             avedists[i].resize(m->getNumGroups(), 0.0);
496         }
497         
498         //make matrix with scores in it
499         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
500         for (int i = 0; i < m->getNumGroups(); i++) {
501             stddists[i].resize(m->getNumGroups(), 0.0);
502         }
503         
504         //flip it so you can print it
505         int count = 0;
506         for (int r=0; r<m->getNumGroups(); r++) { 
507             for (int l = 0; l < r; l++) {
508                 avedists[r][l] = averages[count];
509                 avedists[l][r] = averages[count];
510                 stddists[r][l] = stdDev[count];
511                 stddists[l][r] = stdDev[count];
512                 count++;
513             }
514         }
515         
516         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
517         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
518         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
519         ofstream out;
520         m->openOutputFile(aveFileName, out);
521         
522         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
523         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
524         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
525         ofstream outStd;
526         m->openOutputFile(stdFileName, outStd);
527         
528         if ((outputForm == "lt") || (outputForm == "square")) {
529             //output numSeqs
530             out << m->getNumGroups() << endl;
531             outStd << m->getNumGroups() << endl;
532         }
533         
534         //output to file
535         for (int r=0; r<m->getNumGroups(); r++) { 
536             //output name
537             string name = (m->getGroups())[r];
538             if (name.length() < 10) { //pad with spaces to make compatible
539                 while (name.length() < 10) {  name += " ";  }
540             }
541             
542             if (outputForm == "lt") {
543                 out << name << '\t';
544                 outStd << name << '\t';
545                 
546                 //output distances
547                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
548                 out << endl;  outStd << endl;
549             }else if (outputForm == "square") {
550                 out << name << '\t';
551                 outStd << name << '\t';
552                 
553                 //output distances
554                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
555                 out << endl; outStd << endl;
556             }else{
557                 //output distances
558                 for (int l = 0; l < r; l++) {   
559                     string otherName = (m->getGroups())[l];
560                     if (otherName.length() < 10) { //pad with spaces to make compatible
561                         while (otherName.length() < 10) {  otherName += " ";  }
562                     }
563                     
564                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
565                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
566                 }
567             }
568         }
569         out.close();
570         outStd.close();
571         
572         return 0;
573     }
574         catch(exception& e) {
575                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
576                 exit(1);
577         }
578 }
579
580 /**************************************************************************************************/
581 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
582         try {
583         
584         //used in tree constructor 
585         m->runParse = false;
586         
587         ///create treemap class from groupmap for tree class to use
588         CountTable newCt;
589         set<string> nameMap;
590         map<string, string> groupMap;
591         set<string> gps;
592         for (int i = 0; i < m->getGroups().size(); i++) { 
593             nameMap.insert(m->getGroups()[i]); 
594             gps.insert(m->getGroups()[i]); 
595             groupMap[m->getGroups()[i]] = m->getGroups()[i];
596         }
597         newCt.createTable(nameMap, groupMap, gps);
598         
599         //clear  old tree names if any
600         m->Treenames.clear();
601         
602         //fills globaldatas tree names
603         m->Treenames = m->getGroups();
604         
605         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
606         
607         if (m->control_pressed) { return 0; }
608         
609         Consensus con;
610         Tree* conTree = con.getTree(newTrees);
611         
612         //create a new filename
613         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");                               
614         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
615         ofstream outTree;
616         m->openOutputFile(conFile, outTree);
617         
618         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
619         outTree.close();
620         
621         return 0;
622
623     }
624         catch(exception& e) {
625                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
626                 exit(1);
627         }
628 }
629 /**************************************************************************************************/
630
631 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
632         try {
633         
634         vector<Tree*> trees;
635         
636         //create a new filename
637         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");                             
638         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
639         
640         ofstream outAll;
641         m->openOutputFile(outputFile, outAll);
642         
643
644         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
645             
646             if (m->control_pressed) { break; }
647             
648             //make matrix with scores in it
649             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
650             for (int j = 0; j < m->getNumGroups(); j++) {
651                 sims[j].resize(m->getNumGroups(), 0.0);
652             }
653             
654             int count = 0;
655                         for (int r=0; r<m->getNumGroups(); r++) { 
656                                 for (int l = 0; l < r; l++) {
657                     double sim = -(dists[i][count]-1.0);
658                                         sims[r][l] = sim;
659                                         sims[l][r] = sim;
660                                         count++;
661                                 }
662                         }
663
664             //create tree
665             Tree* tempTree = new Tree(&myct, sims);
666             tempTree->assembleTree();
667             
668             trees.push_back(tempTree);
669             
670             //print tree
671             tempTree->print(outAll);
672         }
673         
674         outAll.close();
675         
676         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
677         
678         return trees;
679     }
680         catch(exception& e) {
681                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
682                 exit(1);
683         }
684 }
685 /**************************************************************************************************/
686
687 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
688         try {
689         
690         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
691         vector< vector<string> > namesOfGroupCombos;
692         for (int a=0; a<numGroups; a++) { 
693             for (int l = 0; l < a; l++) {       
694                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
695                 namesOfGroupCombos.push_back(groups);
696             }
697         }
698         
699         lines.clear();
700         
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
702         if(processors != 1){
703             int numPairs = namesOfGroupCombos.size();
704             int numPairsPerProcessor = numPairs / processors;
705             
706             for (int i = 0; i < processors; i++) {
707                 int startPos = i * numPairsPerProcessor;
708                 if(i == processors - 1){
709                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
710                 }
711                 lines.push_back(linePair(startPos, numPairsPerProcessor));
712             }
713         }
714 #endif
715         
716         
717         //get scores for random trees
718         for (int j = 0; j < iters; j++) {
719             cout << j << endl; 
720 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
721             if(processors == 1){
722                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
723             }else{
724                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
725             }
726 #else
727             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
728 #endif
729             
730             if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
731             
732             //report progress
733             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
734         }
735         lines.clear();
736         
737         //find the signifigance of the score for summary file
738         for (int f = 0; f < numComp; f++) {
739             //sort random scores
740             sort(rScores[f].begin(), rScores[f].end());
741             
742             //the index of the score higher than yours is returned 
743             //so if you have 1000 random trees the index returned is 100 
744             //then there are 900 trees with a score greater then you. 
745             //giving you a signifigance of 0.900
746             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
747             
748             //the signifigance is the number of trees with the users score or higher 
749             WScoreSig.push_back((iters-index)/(float)iters);
750         }
751         
752         //out << "Tree# " << i << endl;
753         calculateFreqsCumuls();
754         printWeightedFile();
755         
756         delete output;
757         
758         return 0;
759     }
760         catch(exception& e) {
761                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
762                 exit(1);
763         }
764 }
765 /**************************************************************************************************/
766
767 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
768         try {
769 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
770                 int process = 1;
771                 vector<int> processIDS;
772                 
773                 EstOutput results;
774                 
775                 //loop through and create all the processes you want
776                 while (process != processors) {
777                         int pid = fork();
778                         
779                         if (pid > 0) {
780                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
781                                 process++;
782                         }else if (pid == 0){
783                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
784                         
785                                 //pass numSeqs to parent
786                                 ofstream out;
787                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
788                                 m->openOutputFile(tempFile, out);
789                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
790                                 out.close();
791                                 
792                                 exit(0);
793                         }else { 
794                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
795                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
796                                 exit(0);
797                         }
798                 }
799                 
800                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
801                 
802                 //force parent to wait until all the processes are done
803                 for (int i=0;i<(processors-1);i++) { 
804                         int temp = processIDS[i];
805                         wait(&temp);
806                 }
807                 
808                 //get data created by processes
809                 for (int i=0;i<(processors-1);i++) { 
810         
811                         ifstream in;
812                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
813                         m->openInputFile(s, in);
814                         
815                         double tempScore;
816                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
817                         in.close();
818                         m->mothurRemove(s);
819                 }
820                 
821                 return 0;
822 #endif          
823         }
824         catch(exception& e) {
825                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
826                 exit(1);
827         }
828 }
829
830 /**************************************************************************************************/
831 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
832  try {
833                 Tree* randT = new Tree(ct);
834      
835         Weighted weighted(includeRoot);
836      
837                 for (int h = start; h < (start+num); h++) {
838         
839                         if (m->control_pressed) { return 0; }
840                 
841                         //initialize weighted score
842                         string groupA = namesOfGroupCombos[h][0]; 
843                         string groupB = namesOfGroupCombos[h][1];
844                         
845                         //copy T[i]'s info.
846                         randT->getCopy(t);
847                          
848                         //create a random tree with same topology as T[i], but different labels
849                         randT->assembleRandomUnifracTree(groupA, groupB);
850                         
851                         if (m->control_pressed) { delete randT;  return 0;  }
852
853                         //get wscore of random tree
854                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
855                 
856                         if (m->control_pressed) { delete randT;  return 0;  }
857                                                                                 
858                         //save scores
859                         scores[h].push_back(randomData[0]);
860                 }
861         
862                 delete randT;
863         
864                 return 0;
865
866         }
867         catch(exception& e) {
868                 m->errorOut(e, "UnifracWeightedCommand", "driver");
869                 exit(1);
870         }
871 }
872 /***********************************************************/
873 void UnifracWeightedCommand::printWeightedFile() {
874         try {
875                 vector<double> data;
876                 vector<string> tags;
877                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
878                 
879                 for(int a = 0; a < numComp; a++) {
880                         output->initFile(groupComb[a], tags);
881                         //print each line
882                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
883                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
884                                 output->output(data);
885                                 data.clear();
886                         } 
887                         output->resetFile();
888                 }
889         }
890         catch(exception& e) {
891                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
892                 exit(1);
893         }
894 }
895
896
897 /***********************************************************/
898 void UnifracWeightedCommand::printWSummaryFile() {
899         try {
900                 //column headers
901                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
902                 m->mothurOut("Tree#\tGroups\tWScore\t");
903                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
904                 outSum << endl; m->mothurOutEndLine();
905                 
906                 //format output
907                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
908                 
909                 //print each line
910                 int count = 0;
911                 for (int i = 0; i < T.size(); i++) { 
912                         for (int j = 0; j < numComp; j++) {
913                                 if (random) {
914                                         if (WScoreSig[count] > (1/(float)iters)) {
915                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
916                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
917                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
918                                         }else{
919                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
920                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
921                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
922                                         }
923                                 }else{
924                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
925                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
926                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
927                                 }
928                                 count++;
929                         }
930                 }
931                 outSum.close();
932         }
933         catch(exception& e) {
934                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
935                 exit(1);
936         }
937 }
938 /***********************************************************/
939 void UnifracWeightedCommand::createPhylipFile() {
940         try {
941                 int count = 0;
942                 //for each tree
943                 for (int i = 0; i < T.size(); i++) { 
944                 
945                         string phylipFileName;
946                         if ((outputForm == "lt") || (outputForm == "square")) {
947                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
948                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
949                         }else { //column
950                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
951                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
952                         }
953                         
954                         ofstream out;
955                         m->openOutputFile(phylipFileName, out);
956                         
957                         if ((outputForm == "lt") || (outputForm == "square")) {
958                                 //output numSeqs
959                                 out << m->getNumGroups() << endl;
960                         }
961
962                         //make matrix with scores in it
963                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
964                         for (int i = 0; i < m->getNumGroups(); i++) {
965                                 dists[i].resize(m->getNumGroups(), 0.0);
966                         }
967                         
968                         //flip it so you can print it
969                         for (int r=0; r<m->getNumGroups(); r++) { 
970                                 for (int l = 0; l < r; l++) {
971                                         dists[r][l] = utreeScores[count];
972                                         dists[l][r] = utreeScores[count];
973                                         count++;
974                                 }
975                         }
976
977                         //output to file
978                         for (int r=0; r<m->getNumGroups(); r++) { 
979                                 //output name
980                                 string name = (m->getGroups())[r];
981                                 if (name.length() < 10) { //pad with spaces to make compatible
982                                         while (name.length() < 10) {  name += " ";  }
983                                 }
984                                 
985                                 if (outputForm == "lt") {
986                                         out << name << '\t';
987                                         
988                                         //output distances
989                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
990                                         out << endl;
991                                 }else if (outputForm == "square") {
992                                         out << name << '\t';
993                                         
994                                         //output distances
995                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
996                                         out << endl;
997                                 }else{
998                                         //output distances
999                                         for (int l = 0; l < r; l++) {   
1000                                                 string otherName = (m->getGroups())[l];
1001                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
1002                                                         while (otherName.length() < 10) {  otherName += " ";  }
1003                                                 }
1004                                                 
1005                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
1006                                         }
1007                                 }
1008                         }
1009                         out.close();
1010                 }
1011         }
1012         catch(exception& e) {
1013                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1014                 exit(1);
1015         }
1016 }
1017 /***********************************************************/
1018 int UnifracWeightedCommand::findIndex(float score, int index) {
1019         try{
1020                 for (int i = 0; i < rScores[index].size(); i++) {
1021                         if (rScores[index][i] >= score) {       return i;       }
1022                 }
1023                 return rScores[index].size();
1024         }
1025         catch(exception& e) {
1026                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1027                 exit(1);
1028         }
1029 }
1030
1031 /***********************************************************/
1032
1033 void UnifracWeightedCommand::calculateFreqsCumuls() {
1034         try {
1035                 //clear out old tree values
1036                 rScoreFreq.clear();
1037                 rScoreFreq.resize(numComp);
1038                 rCumul.clear();
1039                 rCumul.resize(numComp);
1040                 validScores.clear();
1041         
1042                 //calculate frequency
1043                 for (int f = 0; f < numComp; f++) {
1044                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1045                                 validScores[rScores[f][i]] = rScores[f][i];
1046                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1047                                 if (it != rScoreFreq[f].end()) {
1048                                         rScoreFreq[f][rScores[f][i]]++;
1049                                 }else{
1050                                         rScoreFreq[f][rScores[f][i]] = 1;
1051                                 }
1052                         }
1053                 }
1054                 
1055                 //calculate rcumul
1056                 for(int a = 0; a < numComp; a++) {
1057                         float rcumul = 1.0000;
1058                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1059                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1060                                 //make rscoreFreq map and rCumul
1061                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1062                                 rCumul[a][it->first] = rcumul;
1063                                 //get percentage of random trees with that info
1064                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1065                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1066                         }
1067                 }
1068
1069         }
1070         catch(exception& e) {
1071                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1072                 exit(1);
1073         }
1074 }
1075 /***********************************************************/
1076
1077
1078
1079
1080