]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
26                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
27         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
28         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
29         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracUnweightedCommand::getHelpString(){       
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
54         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
55         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
56                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){        
69         try {
70         string outputFileName = "";
71                 map<string, vector<string> >::iterator it;
72         
73         //is this a type this command creates
74         it = outputTypes.find(type);
75         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76         else {
77             if (type == "unweighted")            {   outputFileName =  "unweighted";   }
78             else if (type == "uwsummary")        {   outputFileName =  "uwsummary";   }
79             else if (type == "phylip")           {   outputFileName =  "dist";   }
80             else if (type == "column")           {   outputFileName =  "dist";   }
81             else if (type == "tree")             {   outputFileName =  "tre";   }
82             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
83         }
84         return outputFileName;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
88                 exit(1);
89         }
90 }
91
92 //**********************************************************************************************************************
93 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
94         try {
95                 abort = true; calledHelp = true; 
96                 setParameters();
97                 vector<string> tempOutNames;
98                 outputTypes["unweighted"] = tempOutNames;
99                 outputTypes["uwsummary"] = tempOutNames;
100                 outputTypes["phylip"] = tempOutNames;
101                 outputTypes["column"] = tempOutNames;
102         outputTypes["tree"] = tempOutNames;
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
106                 exit(1);
107         }
108 }
109 /***********************************************************/
110 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
111         try {
112                 abort = false; calledHelp = false;   
113                 
114                         
115                 //allow user to run help
116                 if(option == "help") { help(); abort = true; calledHelp = true; }
117                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
118                 
119                 else {
120                         vector<string> myArray = setParameters();
121                         
122                         OptionParser parser(option);
123                         map<string,string> parameters = parser.getParameters();
124                         map<string,string>::iterator it;
125                         
126                         ValidParameters validParameter;
127                 
128                         //check to make sure all parameters are valid for command
129                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
130                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
131                         }
132                         
133                         //initialize outputTypes
134                         vector<string> tempOutNames;
135                         outputTypes["unweighted"] = tempOutNames;
136                         outputTypes["uwsummary"] = tempOutNames;
137                         outputTypes["phylip"] = tempOutNames;
138                         outputTypes["column"] = tempOutNames;
139             outputTypes["tree"] = tempOutNames;
140                         
141                         //if the user changes the input directory command factory will send this info to us in the output parameter 
142                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
143                         if (inputDir == "not found"){   inputDir = "";          }
144                         else {
145                                 string path;
146                                 it = parameters.find("tree");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
152                                 }
153                                 
154                                 it = parameters.find("group");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
160                                 }
161                                 
162                                 it = parameters.find("name");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
168                                 }
169                 
170                 it = parameters.find("count");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
176                                 }
177                         }
178                         
179             //check for required parameters
180                         treefile = validParameter.validFile(parameters, "tree", true);
181                         if (treefile == "not open") { abort = true; }
182                         else if (treefile == "not found") {                             //if there is a current design file, use it
183                                 treefile = m->getTreeFile(); 
184                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
185                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
186                         }else { m->setTreeFile(treefile); }     
187                         
188                         //check for required parameters
189                         groupfile = validParameter.validFile(parameters, "group", true);
190                         if (groupfile == "not open") { abort = true; }
191                         else if (groupfile == "not found") { groupfile = ""; }
192                         else { m->setGroupFile(groupfile); }
193                         
194                         namefile = validParameter.validFile(parameters, "name", true);
195                         if (namefile == "not open") { namefile = ""; abort = true; }
196                         else if (namefile == "not found") { namefile = ""; }
197                         else { m->setNameFile(namefile); }
198             
199             countfile = validParameter.validFile(parameters, "count", true);
200                         if (countfile == "not open") { countfile = ""; abort = true; }
201                         else if (countfile == "not found") { countfile = "";  } 
202                         else { m->setCountTableFile(countfile); }
203             
204             if ((namefile != "") && (countfile != "")) {
205                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
206             }
207                         
208             if ((groupfile != "") && (countfile != "")) {
209                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
210             }
211                         
212                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
213                         
214                         //check for optional parameter and set defaults
215                         // ...at some point should added some additional type checking...
216                         groups = validParameter.validFile(parameters, "groups", false);                 
217                         if (groups == "not found") { groups = ""; }
218                         else { 
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                                 
223                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
224                         m->mothurConvert(itersString, iters); 
225                         
226                         string temp = validParameter.validFile(parameters, "distance", false);                  
227                         if (temp == "not found") { phylip = false; outputForm = ""; }
228                         else{
229                 if (temp=="phylip") { temp = "lt"; }
230                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
231                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
232                         }
233                         
234                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
235                         random = m->isTrue(temp);
236                         
237                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
238                         includeRoot = m->isTrue(temp);
239                         
240                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
241                         m->setProcessors(temp);
242                         m->mothurConvert(temp, processors); 
243                         
244             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
245                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
246             else {  
247                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
248                 else { subsample = false; }
249             }
250                         
251             if (!subsample) { subsampleIters = 0;   }
252             else { subsampleIters = iters;          }
253             
254             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
255                         consensus = m->isTrue(temp);
256             
257                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
258             if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
259             else {  
260                 CountTable testCt; 
261                 if ((!testCt.testGroups(countfile)) && (subsample)) {
262                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
263                 }
264             }
265             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
267
268                         if (!random) {  iters = 0;  } //turn off random calcs
269                         
270                         //if user selects distance = true and no groups it won't calc the pairwise
271                         if ((phylip) && (Groups.size() == 0)) {
272                                 groups = "all";
273                                 m->splitAtDash(groups, Groups);
274                                 m->setGroups(Groups);
275                         }
276                         
277                         if (countfile=="") {
278                 if (namefile == "") {
279                     vector<string> files; files.push_back(treefile);
280                     parser.getNameFile(files);
281                 } 
282             }
283                 }
284                 
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
288                 exit(1);
289         }
290 }
291
292 /***********************************************************/
293 int UnifracUnweightedCommand::execute() {
294         try {
295                 
296                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
297                 
298                 m->setTreeFile(treefile);
299                 
300                 TreeReader* reader;
301         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
302         else { reader = new TreeReader(treefile, countfile); }
303         T = reader->getTrees();
304         ct = T[0]->getCountTable();
305         delete reader;
306         
307                 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
308                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
309                 m->openOutputFile(sumFile, outSum);
310                 
311                 SharedUtil util;
312                 Groups = m->getGroups();
313                 vector<string> namesGroups = ct->getNamesOfGroups();
314                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
315                 
316                 Unweighted unweighted(includeRoot);
317                 
318                 int start = time(NULL);
319         
320         //set or check size
321         if (subsample) {
322             //user has not set size, set size = smallest samples size
323             if (subsampleSize == -1) { 
324                 vector<string> temp; temp.push_back(Groups[0]);
325                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
326                 for (int i = 1; i < Groups.size(); i++) {
327                     int thisSize = ct->getGroupCount(Groups[i]);
328                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
329                 }
330                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
331             }else { //eliminate any too small groups
332                 vector<string> newGroups = Groups;
333                 Groups.clear();
334                 for (int i = 0; i < newGroups.size(); i++) {
335                     int thisSize = ct->getGroupCount(newGroups[i]);
336                     
337                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
338                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
339                 } 
340                 m->setGroups(Groups);
341             }
342         }
343                 
344         util.getCombos(groupComb, Groups, numComp);
345                 m->setGroups(Groups);
346         
347                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
348         
349                 if (numComp < processors) { processors = numComp;  }
350         
351         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
352                 
353                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
354                 m->mothurOut("Tree#\tGroups\tUWScore\t");
355                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
356                 outSum << endl; m->mothurOutEndLine();
357          
358                 //get pscores for users trees
359                 for (int i = 0; i < T.size(); i++) {
360                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
361                         
362             counter = 0;
363                         
364                         if (random)  {  
365                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
366                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
367                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
368                         }
369                         
370                         
371                         //get unweighted for users tree
372                         rscoreFreq.resize(numComp);  
373                         rCumul.resize(numComp);  
374                         utreeScores.resize(numComp);  
375                         UWScoreSig.resize(numComp); 
376             
377             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
378
379                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
380                 
381                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  }return 0; }
382                         
383                         //output scores for each combination
384                         for(int k = 0; k < numComp; k++) {
385                                 //saves users score
386                                 utreeScores[k].push_back(userData[k]);
387                                 
388                                 //add users score to validscores
389                                 validScores[userData[k]] = userData[k];
390                 
391                 if (!random) { UWScoreSig[k].push_back(0.0);    }
392                         }
393             
394             if (random) {  runRandomCalcs(T[i], userData);  }
395                         
396                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0;  }
397             
398             int startSubsample = time(NULL);
399             
400             //subsample loop
401             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
402             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
403                 if (m->control_pressed) { break; }
404                 
405                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
406                 CountTable* newCt = new CountTable();
407                  
408                 //uses method of setting groups to doNotIncludeMe
409                 SubSample sample;
410                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
411                 
412                 //call new weighted function
413                 vector<double> iterData; iterData.resize(numComp,0);
414                 Unweighted thisUnweighted(includeRoot);
415                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
416         
417                 //save data to make ave dist, std dist
418                 calcDistsTotals.push_back(iterData);
419                 
420                 delete newCt;
421                 delete subSampleTree;
422                 
423                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
424             }
425             if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
426             
427             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
428
429             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
430             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
431             
432             //print output files
433                         printUWSummaryFile(i);
434                         if (random)  {  printUnweightedFile();  delete output;  }
435                         if (phylip) {   createPhylipFile(i);            }
436                         
437                         rscoreFreq.clear(); 
438                         rCumul.clear();  
439                         validScores.clear(); 
440                         utreeScores.clear();  
441                         UWScoreSig.clear(); 
442                 }
443                 
444
445                 outSum.close();
446                 delete ct; 
447                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
448                 
449                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
450                 
451                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
452                 
453                 //set phylip file as new current phylipfile
454                 string current = "";
455                 itTypes = outputTypes.find("phylip");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
458                 }
459                 
460                 //set column file as new current columnfile
461                 itTypes = outputTypes.find("column");
462                 if (itTypes != outputTypes.end()) {
463                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
464                 }
465                 
466                 m->mothurOutEndLine();
467                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
468                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
469                 m->mothurOutEndLine();
470                 
471                 return 0;
472                 
473         }
474         catch(exception& e) {
475                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
476                 exit(1);
477         }
478 }
479 /**************************************************************************************************/
480 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
481         try {
482         //we need to find the average distance and standard deviation for each groups distance
483         
484         //finds sum
485         vector<double> averages; averages.resize(numComp, 0); 
486         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
487             for (int i = 0; i < dists[thisIter].size(); i++) {  
488                 averages[i] += dists[thisIter][i];
489             }
490         }
491         
492         //finds average.
493         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
494         
495         //find standard deviation
496         vector<double> stdDev; stdDev.resize(numComp, 0);
497         
498         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
499             for (int j = 0; j < dists[thisIter].size(); j++) {
500                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
501             }
502         }
503         for (int i = 0; i < stdDev.size(); i++) {  
504             stdDev[i] /= (float) subsampleIters; 
505             stdDev[i] = sqrt(stdDev[i]);
506         }
507         
508         //make matrix with scores in it
509         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
510         for (int i = 0; i < m->getNumGroups(); i++) {
511             avedists[i].resize(m->getNumGroups(), 0.0);
512         }
513         
514         //make matrix with scores in it
515         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
516         for (int i = 0; i < m->getNumGroups(); i++) {
517             stddists[i].resize(m->getNumGroups(), 0.0);
518         }
519         
520         //flip it so you can print it
521         int count = 0;
522         for (int r=0; r<m->getNumGroups(); r++) { 
523             for (int l = 0; l < r; l++) {
524                 avedists[r][l] = averages[count];
525                 avedists[l][r] = averages[count];
526                 stddists[r][l] = stdDev[count];
527                 stddists[l][r] = stdDev[count];
528                 count++;
529             }
530         }
531         
532         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
533         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
534         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
535         ofstream out;
536         m->openOutputFile(aveFileName, out);
537         
538         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
539         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
540         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
541         ofstream outStd;
542         m->openOutputFile(stdFileName, outStd);
543         
544         if ((outputForm == "lt") || (outputForm == "square")) {
545             //output numSeqs
546             out << m->getNumGroups() << endl;
547             outStd << m->getNumGroups() << endl;
548         }
549         
550         //output to file
551         for (int r=0; r<m->getNumGroups(); r++) { 
552             //output name
553             string name = (m->getGroups())[r];
554             if (name.length() < 10) { //pad with spaces to make compatible
555                 while (name.length() < 10) {  name += " ";  }
556             }
557             
558             if (outputForm == "lt") {
559                 out << name << '\t';
560                 outStd << name << '\t';
561                 
562                 //output distances
563                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
564                 out << endl;  outStd << endl;
565             }else if (outputForm == "square") {
566                 out << name << '\t';
567                 outStd << name << '\t';
568                 
569                 //output distances
570                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
571                 out << endl; outStd << endl;
572             }else{
573                 //output distances
574                 for (int l = 0; l < r; l++) {   
575                     string otherName = (m->getGroups())[l];
576                     if (otherName.length() < 10) { //pad with spaces to make compatible
577                         while (otherName.length() < 10) {  otherName += " ";  }
578                     }
579                     
580                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
581                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
582                 }
583             }
584         }
585         out.close();
586         outStd.close();
587         
588         return 0;
589     }
590         catch(exception& e) {
591                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
592                 exit(1);
593         }
594 }
595
596 /**************************************************************************************************/
597 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
598         try {
599         
600         //used in tree constructor 
601         m->runParse = false;
602         
603         //create treemap class from groupmap for tree class to use
604         CountTable newCt;
605         set<string> nameMap;
606         map<string, string> groupMap;
607         set<string> gps;
608         for (int i = 0; i < m->getGroups().size(); i++) { 
609             nameMap.insert(m->getGroups()[i]); 
610             gps.insert(m->getGroups()[i]); 
611             groupMap[m->getGroups()[i]] = m->getGroups()[i];
612         }
613         newCt.createTable(nameMap, groupMap, gps);
614         
615         //clear  old tree names if any
616         m->Treenames.clear();
617         
618         //fills globaldatas tree names
619         m->Treenames = m->getGroups();
620         
621         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
622         
623         if (m->control_pressed) { return 0; }
624         
625         Consensus con;
626         Tree* conTree = con.getTree(newTrees);
627         
628         //create a new filename
629         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");                             
630         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
631         ofstream outTree;
632         m->openOutputFile(conFile, outTree);
633         
634         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
635         outTree.close();
636         
637         return 0;
638         
639     }
640         catch(exception& e) {
641                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
642                 exit(1);
643         }
644 }
645 /**************************************************************************************************/
646
647 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
648         try {
649         
650         vector<Tree*> trees;
651         
652         //create a new filename
653         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");                           
654         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
655         
656         ofstream outAll;
657         m->openOutputFile(outputFile, outAll);
658         
659         
660         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
661             
662             if (m->control_pressed) { break; }
663             
664             //make matrix with scores in it
665             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
666             for (int j = 0; j < m->getNumGroups(); j++) {
667                 sims[j].resize(m->getNumGroups(), 0.0);
668             }
669             
670             int count = 0;
671                         for (int r=0; r<m->getNumGroups(); r++) { 
672                                 for (int l = 0; l < r; l++) {
673                     double sim = -(dists[i][count]-1.0);
674                                         sims[r][l] = sim;
675                                         sims[l][r] = sim;
676                                         count++;
677                                 }
678                         }
679             
680             //create tree
681             Tree* tempTree = new Tree(&myct, sims);
682             tempTree->assembleTree();
683             
684             trees.push_back(tempTree);
685             
686             //print tree
687             tempTree->print(outAll);
688         }
689         
690         outAll.close();
691         
692         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
693         
694         return trees;
695     }
696         catch(exception& e) {
697                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
698                 exit(1);
699         }
700 }
701 /**************************************************************************************************/
702
703 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
704         try {
705         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
706         
707         Unweighted unweighted(includeRoot);
708         
709         //get unweighted scores for random trees - if random is false iters = 0
710         for (int j = 0; j < iters; j++) {
711             
712             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
713             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
714             
715             if (m->control_pressed) { return 0; }
716                         
717             for(int k = 0; k < numComp; k++) {  
718                 //add trees unweighted score to map of scores
719                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
720                 if (it != rscoreFreq[k].end()) {//already have that score
721                     rscoreFreq[k][randomData[k]]++;
722                 }else{//first time we have seen this score
723                     rscoreFreq[k][randomData[k]] = 1;
724                 }
725                                 
726                 //add randoms score to validscores
727                 validScores[randomData[k]] = randomData[k];
728             }
729         }
730         
731         for(int a = 0; a < numComp; a++) {
732             float rcumul = 1.0000;
733     
734             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
735             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
736                 //make rscoreFreq map and rCumul
737                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
738                 rCumul[a][it->first] = rcumul;
739                 //get percentage of random trees with that info
740                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
741                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
742             }
743             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
744         }
745         
746         return 0;
747         }
748         catch(exception& e) {
749                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
750                 exit(1);
751         }
752 }
753 /***********************************************************/
754 void UnifracUnweightedCommand::printUnweightedFile() {
755         try {
756                 vector<double> data;
757                 vector<string> tags;
758                 
759                 tags.push_back("Score");
760                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
761                         
762                 for(int a = 0; a < numComp; a++) {
763                         output->initFile(groupComb[a], tags);
764                         //print each line
765                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
766                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
767                                 output->output(data);
768                                 data.clear();
769                         } 
770                         output->resetFile();
771                 }
772         }
773         catch(exception& e) {
774                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
775                 exit(1);
776         }
777 }
778
779 /***********************************************************/
780 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
781         try {
782                                 
783                 //format output
784                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
785                         
786                 //print each line
787
788                 for(int a = 0; a < numComp; a++) {
789                         outSum << i+1 << '\t';
790                         m->mothurOut(toString(i+1) + "\t");
791                         
792                         if (random) {
793                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
794                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
795                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
796                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
797                                 }else {
798                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
799                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
800                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
801                                 }
802                         }else{
803                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
804                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
805                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
806                         }
807                 }
808                 
809         }
810         catch(exception& e) {
811                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
812                 exit(1);
813         }
814 }
815 /***********************************************************/
816 void UnifracUnweightedCommand::createPhylipFile(int i) {
817         try {
818                 string phylipFileName;
819                 if ((outputForm == "lt") || (outputForm == "square")) {
820                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
821                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
822                 }else { //column
823                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
824                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
825                 }
826                 
827                 ofstream out;
828                 m->openOutputFile(phylipFileName, out);
829                 
830                 if ((outputForm == "lt") || (outputForm == "square")) {
831                         //output numSeqs
832                         out << m->getNumGroups() << endl;
833                 }
834                 
835                 //make matrix with scores in it
836                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
837                 for (int i = 0; i < m->getNumGroups(); i++) {
838                         dists[i].resize(m->getNumGroups(), 0.0);
839                 }
840                 
841                 //flip it so you can print it
842                 int count = 0;
843                 for (int r=0; r<m->getNumGroups(); r++) { 
844                         for (int l = 0; l < r; l++) {
845                                 dists[r][l] = utreeScores[count][0];
846                                 dists[l][r] = utreeScores[count][0];
847                                 count++;
848                         }
849                 }
850                 
851                 //output to file
852                 for (int r=0; r<m->getNumGroups(); r++) { 
853                         //output name
854                         string name = (m->getGroups())[r];
855                         if (name.length() < 10) { //pad with spaces to make compatible
856                                 while (name.length() < 10) {  name += " ";  }
857                         }
858                         
859                         if (outputForm == "lt") {
860                                 out << name << '\t';
861                         
862                                 //output distances
863                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
864                                 out << endl;
865                         }else if (outputForm == "square") {
866                                 out << name << '\t';
867                                 
868                                 //output distances
869                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
870                                 out << endl;
871                         }else{
872                                 //output distances
873                                 for (int l = 0; l < r; l++) {   
874                                         string otherName = (m->getGroups())[l];
875                                         if (otherName.length() < 10) { //pad with spaces to make compatible
876                                                 while (otherName.length() < 10) {  otherName += " ";  }
877                                         }
878                                         
879                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
880                                 }
881                         }
882                 }
883                 out.close();
884         }
885         catch(exception& e) {
886                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
887                 exit(1);
888         }
889 }
890 /***********************************************************/
891
892
893
894