]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){       
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["unweighted"] = tempOutNames;
73                 outputTypes["uwsummary"] = tempOutNames;
74                 outputTypes["phylip"] = tempOutNames;
75                 outputTypes["column"] = tempOutNames;
76         outputTypes["tree"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
80                 exit(1);
81         }
82 }
83 /***********************************************************/
84 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;   
87                 
88                         
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         map<string,string>::iterator it;
99                         
100                         ValidParameters validParameter;
101                 
102                         //check to make sure all parameters are valid for command
103                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["unweighted"] = tempOutNames;
110                         outputTypes["uwsummary"] = tempOutNames;
111                         outputTypes["phylip"] = tempOutNames;
112                         outputTypes["column"] = tempOutNames;
113             outputTypes["tree"] = tempOutNames;
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("tree");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
126                                 }
127                                 
128                                 it = parameters.find("group");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("name");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
142                                 }
143                         }
144                         
145             //check for required parameters
146                         treefile = validParameter.validFile(parameters, "tree", true);
147                         if (treefile == "not open") { abort = true; }
148                         else if (treefile == "not found") {                             //if there is a current design file, use it
149                                 treefile = m->getTreeFile(); 
150                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
152                         }else { m->setTreeFile(treefile); }     
153                         
154                         //check for required parameters
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else { m->setGroupFile(groupfile); }
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { namefile = ""; abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }
163                         else { m->setNameFile(namefile); }
164                         
165                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
166                         
167                         //check for optional parameter and set defaults
168                         // ...at some point should added some additional type checking...
169                         groups = validParameter.validFile(parameters, "groups", false);                 
170                         if (groups == "not found") { groups = ""; }
171                         else { 
172                                 m->splitAtDash(groups, Groups);
173                                 m->setGroups(Groups);
174                         }
175                                 
176                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
177                         m->mothurConvert(itersString, iters); 
178                         
179                         string temp = validParameter.validFile(parameters, "distance", false);                  
180                         if (temp == "not found") { phylip = false; outputForm = ""; }
181                         else{
182                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
183                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184                         }
185                         
186                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
187                         random = m->isTrue(temp);
188                         
189                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
190                         includeRoot = m->isTrue(temp);
191                         
192                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
193                         m->setProcessors(temp);
194                         m->mothurConvert(temp, processors); 
195                         
196             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
197                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198             else {  
199                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
200                 else { subsample = false; }
201             }
202                         
203             if (!subsample) { subsampleIters = 0;   }
204             else { subsampleIters = iters;          }
205             
206             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
207                         consensus = m->isTrue(temp);
208             
209                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
210                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
211             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
212             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
213
214                         if (!random) {  iters = 0;  } //turn off random calcs
215                         
216                         //if user selects distance = true and no groups it won't calc the pairwise
217                         if ((phylip) && (Groups.size() == 0)) {
218                                 groups = "all";
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                         
223                         if (namefile == "") {
224                                 vector<string> files; files.push_back(treefile);
225                                 parser.getNameFile(files);
226                         }
227                 }
228                 
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
232                 exit(1);
233         }
234 }
235
236 /***********************************************************/
237 int UnifracUnweightedCommand::execute() {
238         try {
239                 
240                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
241                 
242                 m->setTreeFile(treefile);
243                 
244                 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
245         T = reader->getTrees();
246         tmap = T[0]->getTreeMap();
247         map<string, string> nameMap = reader->getNames();
248         delete reader;  
249         
250                 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
251                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
252                 m->openOutputFile(sumFile, outSum);
253                 
254                 SharedUtil util;
255                 Groups = m->getGroups();
256                 vector<string> namesGroups = tmap->getNamesOfGroups();
257                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
258                 
259                 Unweighted unweighted(includeRoot);
260                 
261                 int start = time(NULL);
262         
263         //set or check size
264         if (subsample) {
265             //user has not set size, set size = smallest samples size
266             if (subsampleSize == -1) { 
267                 vector<string> temp; temp.push_back(Groups[0]);
268                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
269                 for (int i = 1; i < Groups.size(); i++) {
270                     temp.clear(); temp.push_back(Groups[i]);
271                     int thisSize = (tmap->getNamesSeqs(temp)).size();
272                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
273                 }
274                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
275             }else { //eliminate any too small groups
276                 vector<string> newGroups = Groups;
277                 Groups.clear();
278                 for (int i = 0; i < newGroups.size(); i++) {
279                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
280                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
281                     int thisSize = thisGroupsSeqs.size();
282                     
283                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
284                     else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
285                 } 
286                 m->setGroups(Groups);
287             }
288         }
289                 
290         util.getCombos(groupComb, Groups, numComp);
291                 m->setGroups(Groups);
292         
293                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
294         
295                 if (numComp < processors) { processors = numComp;  }
296         
297         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
298                 
299                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
300                 m->mothurOut("Tree#\tGroups\tUWScore\t");
301                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
302                 outSum << endl; m->mothurOutEndLine();
303          
304                 //get pscores for users trees
305                 for (int i = 0; i < T.size(); i++) {
306                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
307                         
308                         counter = 0;
309                         
310                         if (random)  {  
311                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted", itersString);
312                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
313                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
314                         }
315                         
316                         
317                         //get unweighted for users tree
318                         rscoreFreq.resize(numComp);  
319                         rCumul.resize(numComp);  
320                         utreeScores.resize(numComp);  
321                         UWScoreSig.resize(numComp); 
322             
323             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
324
325                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
326                 
327                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  }return 0; }
328                         
329                         //output scores for each combination
330                         for(int k = 0; k < numComp; k++) {
331                                 //saves users score
332                                 utreeScores[k].push_back(userData[k]);
333                                 
334                                 //add users score to validscores
335                                 validScores[userData[k]] = userData[k];
336                 
337                 if (!random) { UWScoreSig[k].push_back(0.0);    }
338                         }
339             
340             if (random) {  runRandomCalcs(T[i], userData);  }
341                         
342                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
343             
344             //subsample loop
345             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
346             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
347                 
348                 if (m->control_pressed) { break; }
349                 
350                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
351                 TreeMap* newTmap = new TreeMap();
352                 newTmap->getCopy(*tmap);
353                 
354                 SubSample sample;
355                 Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
356                 
357                 //call new weighted function
358                 vector<double> iterData; iterData.resize(numComp,0);
359                 Unweighted thisUnweighted(includeRoot);
360                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
361                 
362                 //save data to make ave dist, std dist
363                 calcDistsTotals.push_back(iterData);
364                 
365                 delete newTmap;
366                 delete subSampleTree;
367                 
368                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
369             }
370             
371             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0;  }
372
373             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
374             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
375             
376             //print output files
377                         printUWSummaryFile(i);
378                         if (random)  {  printUnweightedFile();  delete output;  }
379                         if (phylip) {   createPhylipFile(i);            }
380                         
381                         rscoreFreq.clear(); 
382                         rCumul.clear();  
383                         validScores.clear(); 
384                         utreeScores.clear();  
385                         UWScoreSig.clear(); 
386                 }
387                 
388
389                 outSum.close();
390                 delete tmap; 
391                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
392                 
393                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
394                 
395                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
396                 
397                 //set phylip file as new current phylipfile
398                 string current = "";
399                 itTypes = outputTypes.find("phylip");
400                 if (itTypes != outputTypes.end()) {
401                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
402                 }
403                 
404                 //set column file as new current columnfile
405                 itTypes = outputTypes.find("column");
406                 if (itTypes != outputTypes.end()) {
407                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
408                 }
409                 
410                 m->mothurOutEndLine();
411                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
412                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
413                 m->mothurOutEndLine();
414                 
415                 return 0;
416                 
417         }
418         catch(exception& e) {
419                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
420                 exit(1);
421         }
422 }
423 /**************************************************************************************************/
424 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
425         try {
426         //we need to find the average distance and standard deviation for each groups distance
427         
428         //finds sum
429         vector<double> averages; averages.resize(numComp, 0); 
430         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
431             for (int i = 0; i < dists[thisIter].size(); i++) {  
432                 averages[i] += dists[thisIter][i];
433             }
434         }
435         
436         //finds average.
437         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
438         
439         //find standard deviation
440         vector<double> stdDev; stdDev.resize(numComp, 0);
441         
442         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
443             for (int j = 0; j < dists[thisIter].size(); j++) {
444                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
445             }
446         }
447         for (int i = 0; i < stdDev.size(); i++) {  
448             stdDev[i] /= (float) subsampleIters; 
449             stdDev[i] = sqrt(stdDev[i]);
450         }
451         
452         //make matrix with scores in it
453         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
454         for (int i = 0; i < m->getNumGroups(); i++) {
455             avedists[i].resize(m->getNumGroups(), 0.0);
456         }
457         
458         //make matrix with scores in it
459         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
460         for (int i = 0; i < m->getNumGroups(); i++) {
461             stddists[i].resize(m->getNumGroups(), 0.0);
462         }
463         
464         //flip it so you can print it
465         int count = 0;
466         for (int r=0; r<m->getNumGroups(); r++) { 
467             for (int l = 0; l < r; l++) {
468                 avedists[r][l] = averages[count];
469                 avedists[l][r] = averages[count];
470                 stddists[r][l] = stdDev[count];
471                 stddists[l][r] = stdDev[count];
472                 count++;
473             }
474         }
475         
476         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave.dist";
477         outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
478         
479         ofstream out;
480         m->openOutputFile(aveFileName, out);
481         
482         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std.dist";
483         outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
484         
485         ofstream outStd;
486         m->openOutputFile(stdFileName, outStd);
487         
488         if ((outputForm == "lt") || (outputForm == "square")) {
489             //output numSeqs
490             out << m->getNumGroups() << endl;
491             outStd << m->getNumGroups() << endl;
492         }
493         
494         //output to file
495         for (int r=0; r<m->getNumGroups(); r++) { 
496             //output name
497             string name = (m->getGroups())[r];
498             if (name.length() < 10) { //pad with spaces to make compatible
499                 while (name.length() < 10) {  name += " ";  }
500             }
501             
502             if (outputForm == "lt") {
503                 out << name << '\t';
504                 outStd << name << '\t';
505                 
506                 //output distances
507                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
508                 out << endl;  outStd << endl;
509             }else if (outputForm == "square") {
510                 out << name << '\t';
511                 outStd << name << '\t';
512                 
513                 //output distances
514                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
515                 out << endl; outStd << endl;
516             }else{
517                 //output distances
518                 for (int l = 0; l < r; l++) {   
519                     string otherName = (m->getGroups())[l];
520                     if (otherName.length() < 10) { //pad with spaces to make compatible
521                         while (otherName.length() < 10) {  otherName += " ";  }
522                     }
523                     
524                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
525                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
526                 }
527             }
528         }
529         out.close();
530         outStd.close();
531         
532         return 0;
533     }
534         catch(exception& e) {
535                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
536                 exit(1);
537         }
538 }
539
540 /**************************************************************************************************/
541 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
542         try {
543         
544         //used in tree constructor 
545         m->runParse = false;
546         
547         //create treemap class from groupmap for tree class to use
548         TreeMap newTmap;
549         newTmap.makeSim(m->getGroups());
550         
551         //clear  old tree names if any
552         m->Treenames.clear();
553         
554         //fills globaldatas tree names
555         m->Treenames = m->getGroups();
556         
557         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
558         
559         if (m->control_pressed) { return 0; }
560         
561         Consensus con;
562         Tree* conTree = con.getTree(newTrees);
563         
564         //create a new filename
565         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";                         
566         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
567         ofstream outTree;
568         m->openOutputFile(conFile, outTree);
569         
570         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
571         outTree.close();
572         
573         return 0;
574         
575     }
576         catch(exception& e) {
577                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
578                 exit(1);
579         }
580 }
581 /**************************************************************************************************/
582
583 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
584         try {
585         
586         vector<Tree*> trees;
587         
588         //create a new filename
589         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";                               
590         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
591         
592         ofstream outAll;
593         m->openOutputFile(outputFile, outAll);
594         
595         
596         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
597             
598             if (m->control_pressed) { break; }
599             
600             //make matrix with scores in it
601             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
602             for (int j = 0; j < m->getNumGroups(); j++) {
603                 sims[j].resize(m->getNumGroups(), 0.0);
604             }
605             
606             int count = 0;
607                         for (int r=0; r<m->getNumGroups(); r++) { 
608                                 for (int l = 0; l < r; l++) {
609                     double sim = -(dists[i][count]-1.0);
610                                         sims[r][l] = sim;
611                                         sims[l][r] = sim;
612                                         count++;
613                                 }
614                         }
615             
616             //create tree
617             Tree* tempTree = new Tree(&mytmap, sims);
618             map<string, string> empty;
619             tempTree->assembleTree(empty);
620             
621             trees.push_back(tempTree);
622             
623             //print tree
624             tempTree->print(outAll);
625         }
626         
627         outAll.close();
628         
629         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
630         
631         return trees;
632     }
633         catch(exception& e) {
634                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
635                 exit(1);
636         }
637 }
638 /**************************************************************************************************/
639
640 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
641         try {
642         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
643         
644         Unweighted unweighted(includeRoot);
645         
646         //get unweighted scores for random trees - if random is false iters = 0
647         for (int j = 0; j < iters; j++) {
648             
649             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
650             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
651             
652             if (m->control_pressed) { return 0; }
653                         
654             for(int k = 0; k < numComp; k++) {  
655                 //add trees unweighted score to map of scores
656                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
657                 if (it != rscoreFreq[k].end()) {//already have that score
658                     rscoreFreq[k][randomData[k]]++;
659                 }else{//first time we have seen this score
660                     rscoreFreq[k][randomData[k]] = 1;
661                 }
662                                 
663                 //add randoms score to validscores
664                 validScores[randomData[k]] = randomData[k];
665             }
666         }
667         
668         for(int a = 0; a < numComp; a++) {
669             float rcumul = 1.0000;
670     
671             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
672             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
673                 //make rscoreFreq map and rCumul
674                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
675                 rCumul[a][it->first] = rcumul;
676                 //get percentage of random trees with that info
677                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
678                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
679             }
680             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
681         }
682         
683         return 0;
684         }
685         catch(exception& e) {
686                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
687                 exit(1);
688         }
689 }
690 /***********************************************************/
691 void UnifracUnweightedCommand::printUnweightedFile() {
692         try {
693                 vector<double> data;
694                 vector<string> tags;
695                 
696                 tags.push_back("Score");
697                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
698                         
699                 for(int a = 0; a < numComp; a++) {
700                         output->initFile(groupComb[a], tags);
701                         //print each line
702                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
703                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
704                                 output->output(data);
705                                 data.clear();
706                         } 
707                         output->resetFile();
708                 }
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
712                 exit(1);
713         }
714 }
715
716 /***********************************************************/
717 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
718         try {
719                                 
720                 //format output
721                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
722                         
723                 //print each line
724
725                 for(int a = 0; a < numComp; a++) {
726                         outSum << i+1 << '\t';
727                         m->mothurOut(toString(i+1) + "\t");
728                         
729                         if (random) {
730                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
731                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
732                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
733                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
734                                 }else {
735                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
736                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
737                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
738                                 }
739                         }else{
740                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
741                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
742                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
743                         }
744                 }
745                 
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
749                 exit(1);
750         }
751 }
752 /***********************************************************/
753 void UnifracUnweightedCommand::createPhylipFile(int i) {
754         try {
755                 string phylipFileName;
756                 if ((outputForm == "lt") || (outputForm == "square")) {
757                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip.dist";
758                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
759                 }else { //column
760                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column.dist";
761                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
762                 }
763                 
764                 ofstream out;
765                 m->openOutputFile(phylipFileName, out);
766                 
767                 if ((outputForm == "lt") || (outputForm == "square")) {
768                         //output numSeqs
769                         out << m->getNumGroups() << endl;
770                 }
771                 
772                 //make matrix with scores in it
773                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
774                 for (int i = 0; i < m->getNumGroups(); i++) {
775                         dists[i].resize(m->getNumGroups(), 0.0);
776                 }
777                 
778                 //flip it so you can print it
779                 int count = 0;
780                 for (int r=0; r<m->getNumGroups(); r++) { 
781                         for (int l = 0; l < r; l++) {
782                                 dists[r][l] = utreeScores[count][0];
783                                 dists[l][r] = utreeScores[count][0];
784                                 count++;
785                         }
786                 }
787                 
788                 //output to file
789                 for (int r=0; r<m->getNumGroups(); r++) { 
790                         //output name
791                         string name = (m->getGroups())[r];
792                         if (name.length() < 10) { //pad with spaces to make compatible
793                                 while (name.length() < 10) {  name += " ";  }
794                         }
795                         
796                         if (outputForm == "lt") {
797                                 out << name << '\t';
798                         
799                                 //output distances
800                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
801                                 out << endl;
802                         }else if (outputForm == "square") {
803                                 out << name << '\t';
804                                 
805                                 //output distances
806                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
807                                 out << endl;
808                         }else{
809                                 //output distances
810                                 for (int l = 0; l < r; l++) {   
811                                         string otherName = (m->getGroups())[l];
812                                         if (otherName.length() < 10) { //pad with spaces to make compatible
813                                                 while (otherName.length() < 10) {  otherName += " ";  }
814                                         }
815                                         
816                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
817                                 }
818                         }
819                 }
820                 out.close();
821         }
822         catch(exception& e) {
823                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
824                 exit(1);
825         }
826 }
827 /***********************************************************/
828
829
830
831