]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
19a2ece3bb4253e6da6199515d9c203d2d656400
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","unweighted-uwsummary",false,true,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
26                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
27         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
28         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
29         CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracUnweightedCommand::getHelpString(){       
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
54         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
55         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
56                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracUnweightedCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         if (type == "unweighted")            {  pattern = "[filename],unweighted-[filename],[tag],unweighted";   }
72         else if (type == "uwsummary")        {  pattern = "[filename],uwsummary";   }
73         else if (type == "phylip")           {  pattern = "[filename],[tag],[tag2],dist";   }
74         else if (type == "column")           {  pattern = "[filename],[tag],[tag2],dist";   }
75         else if (type == "tree")             {  pattern = "[filename],[tag],[tag2],tre";   }
76         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
77         
78         return pattern;
79     }
80     catch(exception& e) {
81         m->errorOut(e, "UnifracUnweightedCommand", "getOutputPattern");
82         exit(1);
83     }
84 }
85 //**********************************************************************************************************************
86 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["unweighted"] = tempOutNames;
92                 outputTypes["uwsummary"] = tempOutNames;
93                 outputTypes["phylip"] = tempOutNames;
94                 outputTypes["column"] = tempOutNames;
95         outputTypes["tree"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
99                 exit(1);
100         }
101 }
102 /***********************************************************/
103 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 
107                         
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         map<string,string>::iterator it;
118                         
119                         ValidParameters validParameter;
120                 
121                         //check to make sure all parameters are valid for command
122                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["unweighted"] = tempOutNames;
129                         outputTypes["uwsummary"] = tempOutNames;
130                         outputTypes["phylip"] = tempOutNames;
131                         outputTypes["column"] = tempOutNames;
132             outputTypes["tree"] = tempOutNames;
133                         
134                         //if the user changes the input directory command factory will send this info to us in the output parameter 
135                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
136                         if (inputDir == "not found"){   inputDir = "";          }
137                         else {
138                                 string path;
139                                 it = parameters.find("tree");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                                 
155                                 it = parameters.find("name");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
161                                 }
162                 
163                 it = parameters.find("count");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
169                                 }
170                         }
171                         
172             //check for required parameters
173                         treefile = validParameter.validFile(parameters, "tree", true);
174                         if (treefile == "not open") { abort = true; }
175                         else if (treefile == "not found") {                             //if there is a current design file, use it
176                                 treefile = m->getTreeFile(); 
177                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
179                         }else { m->setTreeFile(treefile); }     
180                         
181                         //check for required parameters
182                         groupfile = validParameter.validFile(parameters, "group", true);
183                         if (groupfile == "not open") { abort = true; }
184                         else if (groupfile == "not found") { groupfile = ""; }
185                         else { m->setGroupFile(groupfile); }
186                         
187                         namefile = validParameter.validFile(parameters, "name", true);
188                         if (namefile == "not open") { namefile = ""; abort = true; }
189                         else if (namefile == "not found") { namefile = ""; }
190                         else { m->setNameFile(namefile); }
191             
192             countfile = validParameter.validFile(parameters, "count", true);
193                         if (countfile == "not open") { countfile = ""; abort = true; }
194                         else if (countfile == "not found") { countfile = "";  } 
195                         else { m->setCountTableFile(countfile); }
196             
197             if ((namefile != "") && (countfile != "")) {
198                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
199             }
200                         
201             if ((groupfile != "") && (countfile != "")) {
202                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
203             }
204                         
205                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
206                         
207                         //check for optional parameter and set defaults
208                         // ...at some point should added some additional type checking...
209                         groups = validParameter.validFile(parameters, "groups", false);                 
210                         if (groups == "not found") { groups = ""; }
211                         else { 
212                                 m->splitAtDash(groups, Groups);
213                                 m->setGroups(Groups);
214                         }
215                                 
216                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
217                         m->mothurConvert(itersString, iters); 
218                         
219                         string temp = validParameter.validFile(parameters, "distance", false);                  
220                         if (temp == "not found") { phylip = false; outputForm = ""; }
221                         else{
222                 if (temp=="phylip") { temp = "lt"; }
223                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
224                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
225                         }
226                         
227                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
228                         random = m->isTrue(temp);
229                         
230                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
231                         includeRoot = m->isTrue(temp);
232                         
233                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
234                         m->setProcessors(temp);
235                         m->mothurConvert(temp, processors); 
236                         
237             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
238                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
239             else {  
240                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
241                 else { subsample = false; }
242             }
243                         
244             if (!subsample) { subsampleIters = 0;   }
245             else { subsampleIters = iters;          }
246             
247             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
248                         consensus = m->isTrue(temp);
249             
250                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
251             if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
252             else {  
253                 CountTable testCt; 
254                 if ((!testCt.testGroups(countfile)) && (subsample)) {
255                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
256                 }
257             }
258             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
259             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
260
261                         if (!random) {  iters = 0;  } //turn off random calcs
262                         
263                         //if user selects distance = true and no groups it won't calc the pairwise
264                         if ((phylip) && (Groups.size() == 0)) {
265                                 groups = "all";
266                                 m->splitAtDash(groups, Groups);
267                                 m->setGroups(Groups);
268                         }
269                         
270                         if (countfile=="") {
271                 if (namefile == "") {
272                     vector<string> files; files.push_back(treefile);
273                     parser.getNameFile(files);
274                 } 
275             }
276                 }
277                 
278         }
279         catch(exception& e) {
280                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
281                 exit(1);
282         }
283 }
284
285 /***********************************************************/
286 int UnifracUnweightedCommand::execute() {
287         try {
288                 
289                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
290                 
291                 m->setTreeFile(treefile);
292                 
293                 TreeReader* reader;
294         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
295         else { reader = new TreeReader(treefile, countfile); }
296         T = reader->getTrees();
297         ct = T[0]->getCountTable();
298         delete reader;
299         
300         map<string, string> variables; 
301                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
302                 sumFile = getOutputFileName("uwsummary",variables);
303                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
304                 m->openOutputFile(sumFile, outSum);
305                 
306                 SharedUtil util;
307                 Groups = m->getGroups();
308                 vector<string> namesGroups = ct->getNamesOfGroups();
309                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
310                 
311                 Unweighted unweighted(includeRoot);
312                 
313                 int start = time(NULL);
314         
315         //set or check size
316         if (subsample) {
317             //user has not set size, set size = smallest samples size
318             if (subsampleSize == -1) { 
319                 vector<string> temp; temp.push_back(Groups[0]);
320                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
321                 for (int i = 1; i < Groups.size(); i++) {
322                     int thisSize = ct->getGroupCount(Groups[i]);
323                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
324                 }
325                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
326             }else { //eliminate any too small groups
327                 vector<string> newGroups = Groups;
328                 Groups.clear();
329                 for (int i = 0; i < newGroups.size(); i++) {
330                     int thisSize = ct->getGroupCount(newGroups[i]);
331                     
332                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
333                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
334                 } 
335                 m->setGroups(Groups);
336             }
337         }
338                 
339         util.getCombos(groupComb, Groups, numComp);
340                 m->setGroups(Groups);
341         
342                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
343         
344                 if (numComp < processors) { processors = numComp;  }
345         
346         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
347                 
348                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
349                 m->mothurOut("Tree#\tGroups\tUWScore\t");
350                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
351                 outSum << endl; m->mothurOutEndLine();
352          
353                 //get pscores for users trees
354                 for (int i = 0; i < T.size(); i++) {
355                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
356                         
357             counter = 0;
358                         
359                         if (random)  {  
360                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
361                 variables["[tag]"] = toString(i+1);
362                 string unFileName = getOutputFileName("unweighted", variables);
363                                 output = new ColumnFile(unFileName, itersString);
364                                 outputNames.push_back(unFileName); outputTypes["unweighted"].push_back(unFileName);
365                         }
366                         
367                         
368                         //get unweighted for users tree
369                         rscoreFreq.resize(numComp);  
370                         rCumul.resize(numComp);  
371                         utreeScores.resize(numComp);  
372                         UWScoreSig.resize(numComp); 
373             
374             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
375
376                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
377                 
378                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  }return 0; }
379                         
380                         //output scores for each combination
381                         for(int k = 0; k < numComp; k++) {
382                                 //saves users score
383                                 utreeScores[k].push_back(userData[k]);
384                                 
385                                 //add users score to validscores
386                                 validScores[userData[k]] = userData[k];
387                 
388                 if (!random) { UWScoreSig[k].push_back(0.0);    }
389                         }
390             
391             if (random) {  runRandomCalcs(T[i], userData);  }
392                         
393                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0;  }
394             
395             int startSubsample = time(NULL);
396             
397             //subsample loop
398             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
399             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
400                 if (m->control_pressed) { break; }
401                 
402                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
403                 CountTable* newCt = new CountTable();
404                  
405                 //uses method of setting groups to doNotIncludeMe
406                 int sampleTime = 0;
407                 if (m->debug) { sampleTime = time(NULL); }
408                 SubSample sample;
409                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
410                 
411                 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
412                 
413                 //call new weighted function
414                 vector<double> iterData; iterData.resize(numComp,0);
415                 Unweighted thisUnweighted(includeRoot);
416                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
417         
418                 //save data to make ave dist, std dist
419                 calcDistsTotals.push_back(iterData);
420                 
421                 delete newCt;
422                 delete subSampleTree;
423                 
424                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
425             }
426             if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
427             
428             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
429
430             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
431             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
432             
433             //print output files
434                         printUWSummaryFile(i);
435                         if (random)  {  printUnweightedFile();  delete output;  }
436                         if (phylip) {   createPhylipFile(i);            }
437                         
438                         rscoreFreq.clear(); 
439                         rCumul.clear();  
440                         validScores.clear(); 
441                         utreeScores.clear();  
442                         UWScoreSig.clear(); 
443                 }
444                 
445
446                 outSum.close();
447                 delete ct; 
448                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
449                 
450                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
451                 
452                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
453                 
454                 //set phylip file as new current phylipfile
455                 string current = "";
456                 itTypes = outputTypes.find("phylip");
457                 if (itTypes != outputTypes.end()) {
458                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
459                 }
460                 
461                 //set column file as new current columnfile
462                 itTypes = outputTypes.find("column");
463                 if (itTypes != outputTypes.end()) {
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
465                 }
466                 
467                 m->mothurOutEndLine();
468                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
469                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
470                 m->mothurOutEndLine();
471                 
472                 return 0;
473                 
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
477                 exit(1);
478         }
479 }
480 /**************************************************************************************************/
481 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
482         try {
483         //we need to find the average distance and standard deviation for each groups distance
484         //finds sum
485         vector<double> averages; //averages.resize(numComp, 0.0);
486         for (int i = 0; i < numComp; i++) { averages.push_back(0.0); }
487         
488         if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); }
489         
490         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
491             for (int i = 0; i < dists[thisIter].size(); i++) {  
492                 averages[i] += dists[thisIter][i];
493             }
494         }
495         
496         if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); }
497         
498         //finds average.
499         for (int i = 0; i < averages.size(); i++) {  
500             averages[i] /= (float) subsampleIters; 
501             if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", averages[i] = " + toString(averages[i]) + "\n"); }
502         }
503         
504         //find standard deviation
505         vector<double> stdDev; //stdDev.resize(numComp, 0.0);
506         for (int i = 0; i < numComp; i++) { stdDev.push_back(0.0); }
507         
508         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
509             for (int j = 0; j < dists[thisIter].size(); j++) {
510                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
511             }
512         }
513         for (int i = 0; i < stdDev.size(); i++) {  
514             stdDev[i] /= (float) subsampleIters; 
515             stdDev[i] = sqrt(stdDev[i]);
516             if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", stdDev[i] = " + toString(stdDev[i]) + "\n"); }
517         }
518         
519         //make matrix with scores in it
520         vector< vector<double> > avedists;      //avedists.resize(m->getNumGroups());
521         for (int i = 0; i < m->getNumGroups(); i++) {
522             vector<double> temp;
523             for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
524             avedists.push_back(temp);
525         }
526         
527         //make matrix with scores in it
528         vector< vector<double> > stddists;      //stddists.resize(m->getNumGroups());
529         for (int i = 0; i < m->getNumGroups(); i++) {
530             vector<double> temp;
531             for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
532             //stddists[i].resize(m->getNumGroups(), 0.0);
533             stddists.push_back(temp);
534         }
535         
536         if (m->debug) { m->mothurOut("[DEBUG]: about to fill matrix.\n"); }
537         
538         //flip it so you can print it
539         int count = 0;
540         for (int r=0; r<m->getNumGroups(); r++) { 
541             for (int l = 0; l < r; l++) {
542                 avedists[r][l] = averages[count];
543                 avedists[l][r] = averages[count];
544                 stddists[r][l] = stdDev[count];
545                 stddists[l][r] = stdDev[count];
546                 count++;
547             }
548         }
549         
550         if (m->debug) { m->mothurOut("[DEBUG]: done filling matrix.\n"); }
551         
552         map<string, string> variables; 
553                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
554         variables["[tag]"] = toString(treeNum+1);
555         variables["[tag2]"] = "unweighted.ave";
556         string aveFileName = getOutputFileName("phylip",variables);
557         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
558         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
559         ofstream out;
560         m->openOutputFile(aveFileName, out);
561         
562         variables["[tag2]"] = "unweighted.std";
563         string stdFileName = getOutputFileName("phylip",variables);
564         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
565         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
566         ofstream outStd;
567         m->openOutputFile(stdFileName, outStd);
568         
569         if ((outputForm == "lt") || (outputForm == "square")) {
570             //output numSeqs
571             out << m->getNumGroups() << endl;
572             outStd << m->getNumGroups() << endl;
573         }
574         
575         //output to file
576         for (int r=0; r<m->getNumGroups(); r++) { 
577             //output name
578             string name = (m->getGroups())[r];
579             if (name.length() < 10) { //pad with spaces to make compatible
580                 while (name.length() < 10) {  name += " ";  }
581             }
582             
583             if (outputForm == "lt") {
584                 out << name << '\t';
585                 outStd << name << '\t';
586                 
587                 //output distances
588                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
589                 out << endl;  outStd << endl;
590             }else if (outputForm == "square") {
591                 out << name << '\t';
592                 outStd << name << '\t';
593                 
594                 //output distances
595                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
596                 out << endl; outStd << endl;
597             }else{
598                 //output distances
599                 for (int l = 0; l < r; l++) {   
600                     string otherName = (m->getGroups())[l];
601                     if (otherName.length() < 10) { //pad with spaces to make compatible
602                         while (otherName.length() < 10) {  otherName += " ";  }
603                     }
604                     
605                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
606                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
607                 }
608             }
609         }
610         out.close();
611         outStd.close();
612         
613         return 0;
614     }
615         catch(exception& e) {
616                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
617                 exit(1);
618         }
619 }
620
621 /**************************************************************************************************/
622 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
623         try {
624         
625         //used in tree constructor 
626         m->runParse = false;
627         
628         //create treemap class from groupmap for tree class to use
629         CountTable newCt;
630         set<string> nameMap;
631         map<string, string> groupMap;
632         set<string> gps;
633         for (int i = 0; i < m->getGroups().size(); i++) { 
634             nameMap.insert(m->getGroups()[i]); 
635             gps.insert(m->getGroups()[i]); 
636             groupMap[m->getGroups()[i]] = m->getGroups()[i];
637         }
638         newCt.createTable(nameMap, groupMap, gps);
639         
640         //clear  old tree names if any
641         m->Treenames.clear();
642         
643         //fills globaldatas tree names
644         m->Treenames = m->getGroups();
645         
646         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
647         
648         if (m->control_pressed) { return 0; }
649         
650         Consensus con;
651         Tree* conTree = con.getTree(newTrees);
652         
653         //create a new filename
654         map<string, string> variables; 
655                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
656         variables["[tag]"] = toString(treeNum+1);
657         variables["[tag2]"] = "unweighted.cons";
658         string conFile = getOutputFileName("tree",variables);                           
659         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
660         ofstream outTree;
661         m->openOutputFile(conFile, outTree);
662         
663         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
664         outTree.close();
665         
666         return 0;
667         
668     }
669         catch(exception& e) {
670                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
671                 exit(1);
672         }
673 }
674 /**************************************************************************************************/
675
676 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
677         try {
678         
679         vector<Tree*> trees;
680         
681         //create a new filename
682         map<string, string> variables; 
683                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
684         variables["[tag]"] = toString(treeNum+1);
685         variables["[tag2]"] = "unweighted.all";
686         string outputFile = getOutputFileName("tree",variables);                                
687         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
688         
689         ofstream outAll;
690         m->openOutputFile(outputFile, outAll);
691         
692         
693         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
694             
695             if (m->control_pressed) { break; }
696             
697             //make matrix with scores in it
698             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
699             for (int j = 0; j < m->getNumGroups(); j++) {
700                 sims[j].resize(m->getNumGroups(), 0.0);
701             }
702             
703             int count = 0;
704                         for (int r=0; r<m->getNumGroups(); r++) { 
705                                 for (int l = 0; l < r; l++) {
706                     double sim = -(dists[i][count]-1.0);
707                                         sims[r][l] = sim;
708                                         sims[l][r] = sim;
709                                         count++;
710                                 }
711                         }
712             
713             //create tree
714             Tree* tempTree = new Tree(&myct, sims);
715             tempTree->assembleTree();
716             
717             trees.push_back(tempTree);
718             
719             //print tree
720             tempTree->print(outAll);
721         }
722         
723         outAll.close();
724         
725         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
726         
727         return trees;
728     }
729         catch(exception& e) {
730                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
731                 exit(1);
732         }
733 }
734 /**************************************************************************************************/
735
736 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
737         try {
738         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
739         
740         Unweighted unweighted(includeRoot);
741         
742         //get unweighted scores for random trees - if random is false iters = 0
743         for (int j = 0; j < iters; j++) {
744             
745             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
746             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
747             
748             if (m->control_pressed) { return 0; }
749                         
750             for(int k = 0; k < numComp; k++) {  
751                 //add trees unweighted score to map of scores
752                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
753                 if (it != rscoreFreq[k].end()) {//already have that score
754                     rscoreFreq[k][randomData[k]]++;
755                 }else{//first time we have seen this score
756                     rscoreFreq[k][randomData[k]] = 1;
757                 }
758                                 
759                 //add randoms score to validscores
760                 validScores[randomData[k]] = randomData[k];
761             }
762         }
763         
764         for(int a = 0; a < numComp; a++) {
765             float rcumul = 1.0000;
766     
767             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
768             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
769                 //make rscoreFreq map and rCumul
770                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
771                 rCumul[a][it->first] = rcumul;
772                 //get percentage of random trees with that info
773                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
774                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
775             }
776             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
777         }
778         
779         return 0;
780         }
781         catch(exception& e) {
782                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
783                 exit(1);
784         }
785 }
786 /***********************************************************/
787 void UnifracUnweightedCommand::printUnweightedFile() {
788         try {
789                 vector<double> data;
790                 vector<string> tags;
791                 
792                 tags.push_back("Score");
793                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
794                         
795                 for(int a = 0; a < numComp; a++) {
796                         output->initFile(groupComb[a], tags);
797                         //print each line
798                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
799                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
800                                 output->output(data);
801                                 data.clear();
802                         } 
803                         output->resetFile();
804                 }
805         }
806         catch(exception& e) {
807                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
808                 exit(1);
809         }
810 }
811
812 /***********************************************************/
813 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
814         try {
815                                 
816                 //format output
817                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
818                         
819                 //print each line
820
821                 for(int a = 0; a < numComp; a++) {
822                         outSum << i+1 << '\t';
823                         m->mothurOut(toString(i+1) + "\t");
824                         
825                         if (random) {
826                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
827                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
828                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
829                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
830                                 }else {
831                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
832                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
833                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
834                                 }
835                         }else{
836                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
837                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
838                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
839                         }
840                 }
841                 
842         }
843         catch(exception& e) {
844                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
845                 exit(1);
846         }
847 }
848 /***********************************************************/
849 void UnifracUnweightedCommand::createPhylipFile(int i) {
850         try {
851                 string phylipFileName;
852         map<string, string> variables; 
853                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
854         variables["[tag]"] = toString(i+1);
855                 if ((outputForm == "lt") || (outputForm == "square")) {
856             variables["[tag2]"] = "unweighted.phylip";
857                         phylipFileName = getOutputFileName("phylip",variables);
858                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
859                 }else { //column
860             variables["[tag2]"] = "unweighted.column";
861                         phylipFileName = getOutputFileName("column",variables);
862                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
863                 }
864                 
865                 ofstream out;
866                 m->openOutputFile(phylipFileName, out);
867                 
868                 if ((outputForm == "lt") || (outputForm == "square")) {
869                         //output numSeqs
870                         out << m->getNumGroups() << endl;
871                 }
872                 
873                 //make matrix with scores in it
874                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
875                 for (int i = 0; i < m->getNumGroups(); i++) {
876                         dists[i].resize(m->getNumGroups(), 0.0);
877                 }
878                 
879                 //flip it so you can print it
880                 int count = 0;
881                 for (int r=0; r<m->getNumGroups(); r++) { 
882                         for (int l = 0; l < r; l++) {
883                                 dists[r][l] = utreeScores[count][0];
884                                 dists[l][r] = utreeScores[count][0];
885                                 count++;
886                         }
887                 }
888                 
889                 //output to file
890                 for (int r=0; r<m->getNumGroups(); r++) { 
891                         //output name
892                         string name = (m->getGroups())[r];
893                         if (name.length() < 10) { //pad with spaces to make compatible
894                                 while (name.length() < 10) {  name += " ";  }
895                         }
896                         
897                         if (outputForm == "lt") {
898                                 out << name << '\t';
899                         
900                                 //output distances
901                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
902                                 out << endl;
903                         }else if (outputForm == "square") {
904                                 out << name << '\t';
905                                 
906                                 //output distances
907                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
908                                 out << endl;
909                         }else{
910                                 //output distances
911                                 for (int l = 0; l < r; l++) {   
912                                         string otherName = (m->getGroups())[l];
913                                         if (otherName.length() < 10) { //pad with spaces to make compatible
914                                                 while (otherName.length() < 10) {  otherName += " ";  }
915                                         }
916                                         
917                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
918                                 }
919                         }
920                 }
921                 out.close();
922         }
923         catch(exception& e) {
924                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
925                 exit(1);
926         }
927 }
928 /***********************************************************/
929
930
931
932