]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){       
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){        
68         try {
69         string outputFileName = "";
70                 map<string, vector<string> >::iterator it;
71         
72         //is this a type this command creates
73         it = outputTypes.find(type);
74         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75         else {
76             if (type == "unweighted")            {   outputFileName =  "unweighted";   }
77             else if (type == "uwsummary")        {   outputFileName =  "uwsummary";   }
78             else if (type == "phylip")           {   outputFileName =  "dist";   }
79             else if (type == "column")           {   outputFileName =  "dist";   }
80             else if (type == "tree")             {   outputFileName =  "tre";   }
81             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
82         }
83         return outputFileName;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
87                 exit(1);
88         }
89 }
90
91 //**********************************************************************************************************************
92 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["unweighted"] = tempOutNames;
98                 outputTypes["uwsummary"] = tempOutNames;
99                 outputTypes["phylip"] = tempOutNames;
100                 outputTypes["column"] = tempOutNames;
101         outputTypes["tree"] = tempOutNames;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
105                 exit(1);
106         }
107 }
108 /***********************************************************/
109 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
110         try {
111                 abort = false; calledHelp = false;   
112                 
113                         
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters = parser.getParameters();
123                         map<string,string>::iterator it;
124                         
125                         ValidParameters validParameter;
126                 
127                         //check to make sure all parameters are valid for command
128                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["unweighted"] = tempOutNames;
135                         outputTypes["uwsummary"] = tempOutNames;
136                         outputTypes["phylip"] = tempOutNames;
137                         outputTypes["column"] = tempOutNames;
138             outputTypes["tree"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("tree");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
151                                 }
152                                 
153                                 it = parameters.find("group");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168                         }
169                         
170             //check for required parameters
171                         treefile = validParameter.validFile(parameters, "tree", true);
172                         if (treefile == "not open") { abort = true; }
173                         else if (treefile == "not found") {                             //if there is a current design file, use it
174                                 treefile = m->getTreeFile(); 
175                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
176                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
177                         }else { m->setTreeFile(treefile); }     
178                         
179                         //check for required parameters
180                         groupfile = validParameter.validFile(parameters, "group", true);
181                         if (groupfile == "not open") { abort = true; }
182                         else if (groupfile == "not found") { groupfile = ""; }
183                         else { m->setGroupFile(groupfile); }
184                         
185                         namefile = validParameter.validFile(parameters, "name", true);
186                         if (namefile == "not open") { namefile = ""; abort = true; }
187                         else if (namefile == "not found") { namefile = ""; }
188                         else { m->setNameFile(namefile); }
189                         
190                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
191                         
192                         //check for optional parameter and set defaults
193                         // ...at some point should added some additional type checking...
194                         groups = validParameter.validFile(parameters, "groups", false);                 
195                         if (groups == "not found") { groups = ""; }
196                         else { 
197                                 m->splitAtDash(groups, Groups);
198                                 m->setGroups(Groups);
199                         }
200                                 
201                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
202                         m->mothurConvert(itersString, iters); 
203                         
204                         string temp = validParameter.validFile(parameters, "distance", false);                  
205                         if (temp == "not found") { phylip = false; outputForm = ""; }
206                         else{
207                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
208                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
209                         }
210                         
211                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
212                         random = m->isTrue(temp);
213                         
214                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
215                         includeRoot = m->isTrue(temp);
216                         
217                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
218                         m->setProcessors(temp);
219                         m->mothurConvert(temp, processors); 
220                         
221             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
222                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
223             else {  
224                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
225                 else { subsample = false; }
226             }
227                         
228             if (!subsample) { subsampleIters = 0;   }
229             else { subsampleIters = iters;          }
230             
231             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
232                         consensus = m->isTrue(temp);
233             
234                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
235                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
236             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
237             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
238
239                         if (!random) {  iters = 0;  } //turn off random calcs
240                         
241                         //if user selects distance = true and no groups it won't calc the pairwise
242                         if ((phylip) && (Groups.size() == 0)) {
243                                 groups = "all";
244                                 m->splitAtDash(groups, Groups);
245                                 m->setGroups(Groups);
246                         }
247                         
248                         if (namefile == "") {
249                                 vector<string> files; files.push_back(treefile);
250                                 parser.getNameFile(files);
251                         }
252                 }
253                 
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
257                 exit(1);
258         }
259 }
260
261 /***********************************************************/
262 int UnifracUnweightedCommand::execute() {
263         try {
264                 
265                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
266                 
267                 m->setTreeFile(treefile);
268                 
269                 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
270         T = reader->getTrees();
271         tmap = T[0]->getTreeMap();
272         map<string, string> nameMap = reader->getNames();
273         map<string, string> unique2Dup = reader->getNameMap();
274         delete reader;  
275         
276                 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
277                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
278                 m->openOutputFile(sumFile, outSum);
279                 
280                 SharedUtil util;
281                 Groups = m->getGroups();
282                 vector<string> namesGroups = tmap->getNamesOfGroups();
283                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
284                 
285                 Unweighted unweighted(includeRoot);
286                 
287                 int start = time(NULL);
288         
289         //set or check size
290         if (subsample) {
291             //user has not set size, set size = smallest samples size
292             if (subsampleSize == -1) { 
293                 vector<string> temp; temp.push_back(Groups[0]);
294                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
295                 for (int i = 1; i < Groups.size(); i++) {
296                     temp.clear(); temp.push_back(Groups[i]);
297                     int thisSize = (tmap->getNamesSeqs(temp)).size();
298                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
299                 }
300                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
301             }else { //eliminate any too small groups
302                 vector<string> newGroups = Groups;
303                 Groups.clear();
304                 for (int i = 0; i < newGroups.size(); i++) {
305                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
306                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
307                     int thisSize = thisGroupsSeqs.size();
308                     
309                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
310                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
311                 } 
312                 m->setGroups(Groups);
313             }
314         }
315                 
316         util.getCombos(groupComb, Groups, numComp);
317                 m->setGroups(Groups);
318         
319                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
320         
321                 if (numComp < processors) { processors = numComp;  }
322         
323         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
324                 
325                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
326                 m->mothurOut("Tree#\tGroups\tUWScore\t");
327                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
328                 outSum << endl; m->mothurOutEndLine();
329          
330                 //get pscores for users trees
331                 for (int i = 0; i < T.size(); i++) {
332                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
333                         
334             counter = 0;
335                         
336                         if (random)  {  
337                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
338                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
339                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
340                         }
341                         
342                         
343                         //get unweighted for users tree
344                         rscoreFreq.resize(numComp);  
345                         rCumul.resize(numComp);  
346                         utreeScores.resize(numComp);  
347                         UWScoreSig.resize(numComp); 
348             
349             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
350
351                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
352                 
353                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  }return 0; }
354                         
355                         //output scores for each combination
356                         for(int k = 0; k < numComp; k++) {
357                                 //saves users score
358                                 utreeScores[k].push_back(userData[k]);
359                                 
360                                 //add users score to validscores
361                                 validScores[userData[k]] = userData[k];
362                 
363                 if (!random) { UWScoreSig[k].push_back(0.0);    }
364                         }
365             
366             if (random) {  runRandomCalcs(T[i], userData);  }
367                         
368                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
369             
370             int startSubsample = time(NULL);
371             
372             //subsample loop
373             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
374             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
375                 if (m->control_pressed) { break; }
376                 
377                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
378                 TreeMap* newTmap = new TreeMap();
379                 //newTmap->getCopy(*tmap);
380                 
381                 //SubSample sample;
382                 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
383                 
384                 //uses method of setting groups to doNotIncludeMe
385                 SubSample sample;
386                 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
387                 
388                 //call new weighted function
389                 vector<double> iterData; iterData.resize(numComp,0);
390                 Unweighted thisUnweighted(includeRoot);
391                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
392                 
393                 //save data to make ave dist, std dist
394                 calcDistsTotals.push_back(iterData);
395                 
396                 delete newTmap;
397                 delete subSampleTree;
398                 
399                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
400             }
401             m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
402             
403             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0;  }
404
405             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
406             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
407             
408             //print output files
409                         printUWSummaryFile(i);
410                         if (random)  {  printUnweightedFile();  delete output;  }
411                         if (phylip) {   createPhylipFile(i);            }
412                         
413                         rscoreFreq.clear(); 
414                         rCumul.clear();  
415                         validScores.clear(); 
416                         utreeScores.clear();  
417                         UWScoreSig.clear(); 
418                 }
419                 
420
421                 outSum.close();
422                 delete tmap; 
423                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
424                 
425                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
426                 
427                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
428                 
429                 //set phylip file as new current phylipfile
430                 string current = "";
431                 itTypes = outputTypes.find("phylip");
432                 if (itTypes != outputTypes.end()) {
433                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
434                 }
435                 
436                 //set column file as new current columnfile
437                 itTypes = outputTypes.find("column");
438                 if (itTypes != outputTypes.end()) {
439                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
440                 }
441                 
442                 m->mothurOutEndLine();
443                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
444                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
445                 m->mothurOutEndLine();
446                 
447                 return 0;
448                 
449         }
450         catch(exception& e) {
451                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
452                 exit(1);
453         }
454 }
455 /**************************************************************************************************/
456 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
457         try {
458         //we need to find the average distance and standard deviation for each groups distance
459         
460         //finds sum
461         vector<double> averages; averages.resize(numComp, 0); 
462         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
463             for (int i = 0; i < dists[thisIter].size(); i++) {  
464                 averages[i] += dists[thisIter][i];
465             }
466         }
467         
468         //finds average.
469         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
470         
471         //find standard deviation
472         vector<double> stdDev; stdDev.resize(numComp, 0);
473         
474         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
475             for (int j = 0; j < dists[thisIter].size(); j++) {
476                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
477             }
478         }
479         for (int i = 0; i < stdDev.size(); i++) {  
480             stdDev[i] /= (float) subsampleIters; 
481             stdDev[i] = sqrt(stdDev[i]);
482         }
483         
484         //make matrix with scores in it
485         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
486         for (int i = 0; i < m->getNumGroups(); i++) {
487             avedists[i].resize(m->getNumGroups(), 0.0);
488         }
489         
490         //make matrix with scores in it
491         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
492         for (int i = 0; i < m->getNumGroups(); i++) {
493             stddists[i].resize(m->getNumGroups(), 0.0);
494         }
495         
496         //flip it so you can print it
497         int count = 0;
498         for (int r=0; r<m->getNumGroups(); r++) { 
499             for (int l = 0; l < r; l++) {
500                 avedists[r][l] = averages[count];
501                 avedists[l][r] = averages[count];
502                 stddists[r][l] = stdDev[count];
503                 stddists[l][r] = stdDev[count];
504                 count++;
505             }
506         }
507         
508         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
509         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
510         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
511         ofstream out;
512         m->openOutputFile(aveFileName, out);
513         
514         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
515         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
516         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
517         ofstream outStd;
518         m->openOutputFile(stdFileName, outStd);
519         
520         if ((outputForm == "lt") || (outputForm == "square")) {
521             //output numSeqs
522             out << m->getNumGroups() << endl;
523             outStd << m->getNumGroups() << endl;
524         }
525         
526         //output to file
527         for (int r=0; r<m->getNumGroups(); r++) { 
528             //output name
529             string name = (m->getGroups())[r];
530             if (name.length() < 10) { //pad with spaces to make compatible
531                 while (name.length() < 10) {  name += " ";  }
532             }
533             
534             if (outputForm == "lt") {
535                 out << name << '\t';
536                 outStd << name << '\t';
537                 
538                 //output distances
539                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
540                 out << endl;  outStd << endl;
541             }else if (outputForm == "square") {
542                 out << name << '\t';
543                 outStd << name << '\t';
544                 
545                 //output distances
546                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
547                 out << endl; outStd << endl;
548             }else{
549                 //output distances
550                 for (int l = 0; l < r; l++) {   
551                     string otherName = (m->getGroups())[l];
552                     if (otherName.length() < 10) { //pad with spaces to make compatible
553                         while (otherName.length() < 10) {  otherName += " ";  }
554                     }
555                     
556                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
557                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
558                 }
559             }
560         }
561         out.close();
562         outStd.close();
563         
564         return 0;
565     }
566         catch(exception& e) {
567                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
568                 exit(1);
569         }
570 }
571
572 /**************************************************************************************************/
573 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
574         try {
575         
576         //used in tree constructor 
577         m->runParse = false;
578         
579         //create treemap class from groupmap for tree class to use
580         TreeMap newTmap;
581         newTmap.makeSim(m->getGroups());
582         
583         //clear  old tree names if any
584         m->Treenames.clear();
585         
586         //fills globaldatas tree names
587         m->Treenames = m->getGroups();
588         
589         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
590         
591         if (m->control_pressed) { return 0; }
592         
593         Consensus con;
594         Tree* conTree = con.getTree(newTrees);
595         
596         //create a new filename
597         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");                             
598         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
599         ofstream outTree;
600         m->openOutputFile(conFile, outTree);
601         
602         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
603         outTree.close();
604         
605         return 0;
606         
607     }
608         catch(exception& e) {
609                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
610                 exit(1);
611         }
612 }
613 /**************************************************************************************************/
614
615 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
616         try {
617         
618         vector<Tree*> trees;
619         
620         //create a new filename
621         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");                           
622         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
623         
624         ofstream outAll;
625         m->openOutputFile(outputFile, outAll);
626         
627         
628         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
629             
630             if (m->control_pressed) { break; }
631             
632             //make matrix with scores in it
633             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
634             for (int j = 0; j < m->getNumGroups(); j++) {
635                 sims[j].resize(m->getNumGroups(), 0.0);
636             }
637             
638             int count = 0;
639                         for (int r=0; r<m->getNumGroups(); r++) { 
640                                 for (int l = 0; l < r; l++) {
641                     double sim = -(dists[i][count]-1.0);
642                                         sims[r][l] = sim;
643                                         sims[l][r] = sim;
644                                         count++;
645                                 }
646                         }
647             
648             //create tree
649             Tree* tempTree = new Tree(&mytmap, sims);
650             map<string, string> empty;
651             tempTree->assembleTree(empty);
652             
653             trees.push_back(tempTree);
654             
655             //print tree
656             tempTree->print(outAll);
657         }
658         
659         outAll.close();
660         
661         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
662         
663         return trees;
664     }
665         catch(exception& e) {
666                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
667                 exit(1);
668         }
669 }
670 /**************************************************************************************************/
671
672 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
673         try {
674         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
675         
676         Unweighted unweighted(includeRoot);
677         
678         //get unweighted scores for random trees - if random is false iters = 0
679         for (int j = 0; j < iters; j++) {
680             
681             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
682             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
683             
684             if (m->control_pressed) { return 0; }
685                         
686             for(int k = 0; k < numComp; k++) {  
687                 //add trees unweighted score to map of scores
688                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
689                 if (it != rscoreFreq[k].end()) {//already have that score
690                     rscoreFreq[k][randomData[k]]++;
691                 }else{//first time we have seen this score
692                     rscoreFreq[k][randomData[k]] = 1;
693                 }
694                                 
695                 //add randoms score to validscores
696                 validScores[randomData[k]] = randomData[k];
697             }
698         }
699         
700         for(int a = 0; a < numComp; a++) {
701             float rcumul = 1.0000;
702     
703             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
704             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
705                 //make rscoreFreq map and rCumul
706                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
707                 rCumul[a][it->first] = rcumul;
708                 //get percentage of random trees with that info
709                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
710                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
711             }
712             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
713         }
714         
715         return 0;
716         }
717         catch(exception& e) {
718                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
719                 exit(1);
720         }
721 }
722 /***********************************************************/
723 void UnifracUnweightedCommand::printUnweightedFile() {
724         try {
725                 vector<double> data;
726                 vector<string> tags;
727                 
728                 tags.push_back("Score");
729                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
730                         
731                 for(int a = 0; a < numComp; a++) {
732                         output->initFile(groupComb[a], tags);
733                         //print each line
734                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
735                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
736                                 output->output(data);
737                                 data.clear();
738                         } 
739                         output->resetFile();
740                 }
741         }
742         catch(exception& e) {
743                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
744                 exit(1);
745         }
746 }
747
748 /***********************************************************/
749 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
750         try {
751                                 
752                 //format output
753                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
754                         
755                 //print each line
756
757                 for(int a = 0; a < numComp; a++) {
758                         outSum << i+1 << '\t';
759                         m->mothurOut(toString(i+1) + "\t");
760                         
761                         if (random) {
762                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
763                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
764                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
765                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
766                                 }else {
767                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
768                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
769                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
770                                 }
771                         }else{
772                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
773                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
774                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
775                         }
776                 }
777                 
778         }
779         catch(exception& e) {
780                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
781                 exit(1);
782         }
783 }
784 /***********************************************************/
785 void UnifracUnweightedCommand::createPhylipFile(int i) {
786         try {
787                 string phylipFileName;
788                 if ((outputForm == "lt") || (outputForm == "square")) {
789                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
790                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
791                 }else { //column
792                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
793                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
794                 }
795                 
796                 ofstream out;
797                 m->openOutputFile(phylipFileName, out);
798                 
799                 if ((outputForm == "lt") || (outputForm == "square")) {
800                         //output numSeqs
801                         out << m->getNumGroups() << endl;
802                 }
803                 
804                 //make matrix with scores in it
805                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
806                 for (int i = 0; i < m->getNumGroups(); i++) {
807                         dists[i].resize(m->getNumGroups(), 0.0);
808                 }
809                 
810                 //flip it so you can print it
811                 int count = 0;
812                 for (int r=0; r<m->getNumGroups(); r++) { 
813                         for (int l = 0; l < r; l++) {
814                                 dists[r][l] = utreeScores[count][0];
815                                 dists[l][r] = utreeScores[count][0];
816                                 count++;
817                         }
818                 }
819                 
820                 //output to file
821                 for (int r=0; r<m->getNumGroups(); r++) { 
822                         //output name
823                         string name = (m->getGroups())[r];
824                         if (name.length() < 10) { //pad with spaces to make compatible
825                                 while (name.length() < 10) {  name += " ";  }
826                         }
827                         
828                         if (outputForm == "lt") {
829                                 out << name << '\t';
830                         
831                                 //output distances
832                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
833                                 out << endl;
834                         }else if (outputForm == "square") {
835                                 out << name << '\t';
836                                 
837                                 //output distances
838                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
839                                 out << endl;
840                         }else{
841                                 //output distances
842                                 for (int l = 0; l < r; l++) {   
843                                         string otherName = (m->getGroups())[l];
844                                         if (otherName.length() < 10) { //pad with spaces to make compatible
845                                                 while (otherName.length() < 10) {  otherName += " ";  }
846                                         }
847                                         
848                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
849                                 }
850                         }
851                 }
852                 out.close();
853         }
854         catch(exception& e) {
855                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
856                 exit(1);
857         }
858 }
859 /***********************************************************/
860
861
862
863