]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
added phylip as output file type for commands that output distance matrices. added...
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
26         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){       
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){        
68         try {
69         string outputFileName = "";
70                 map<string, vector<string> >::iterator it;
71         
72         //is this a type this command creates
73         it = outputTypes.find(type);
74         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75         else {
76             if (type == "unweighted")            {   outputFileName =  "unweighted";   }
77             else if (type == "uwsummary")        {   outputFileName =  "uwsummary";   }
78             else if (type == "phylip")           {   outputFileName =  "dist";   }
79             else if (type == "column")           {   outputFileName =  "dist";   }
80             else if (type == "tree")             {   outputFileName =  "tre";   }
81             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
82         }
83         return outputFileName;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
87                 exit(1);
88         }
89 }
90
91 //**********************************************************************************************************************
92 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["unweighted"] = tempOutNames;
98                 outputTypes["uwsummary"] = tempOutNames;
99                 outputTypes["phylip"] = tempOutNames;
100                 outputTypes["column"] = tempOutNames;
101         outputTypes["tree"] = tempOutNames;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
105                 exit(1);
106         }
107 }
108 /***********************************************************/
109 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
110         try {
111                 abort = false; calledHelp = false;   
112                 
113                         
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters = parser.getParameters();
123                         map<string,string>::iterator it;
124                         
125                         ValidParameters validParameter;
126                 
127                         //check to make sure all parameters are valid for command
128                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["unweighted"] = tempOutNames;
135                         outputTypes["uwsummary"] = tempOutNames;
136                         outputTypes["phylip"] = tempOutNames;
137                         outputTypes["column"] = tempOutNames;
138             outputTypes["tree"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("tree");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
151                                 }
152                                 
153                                 it = parameters.find("group");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168                         }
169                         
170             //check for required parameters
171                         treefile = validParameter.validFile(parameters, "tree", true);
172                         if (treefile == "not open") { abort = true; }
173                         else if (treefile == "not found") {                             //if there is a current design file, use it
174                                 treefile = m->getTreeFile(); 
175                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
176                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
177                         }else { m->setTreeFile(treefile); }     
178                         
179                         //check for required parameters
180                         groupfile = validParameter.validFile(parameters, "group", true);
181                         if (groupfile == "not open") { abort = true; }
182                         else if (groupfile == "not found") { groupfile = ""; }
183                         else { m->setGroupFile(groupfile); }
184                         
185                         namefile = validParameter.validFile(parameters, "name", true);
186                         if (namefile == "not open") { namefile = ""; abort = true; }
187                         else if (namefile == "not found") { namefile = ""; }
188                         else { m->setNameFile(namefile); }
189                         
190                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
191                         
192                         //check for optional parameter and set defaults
193                         // ...at some point should added some additional type checking...
194                         groups = validParameter.validFile(parameters, "groups", false);                 
195                         if (groups == "not found") { groups = ""; }
196                         else { 
197                                 m->splitAtDash(groups, Groups);
198                                 m->setGroups(Groups);
199                         }
200                                 
201                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
202                         m->mothurConvert(itersString, iters); 
203                         
204                         string temp = validParameter.validFile(parameters, "distance", false);                  
205                         if (temp == "not found") { phylip = false; outputForm = ""; }
206                         else{
207                 if (temp=="phylip") { temp = "lt"; }
208                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
209                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
210                         }
211                         
212                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
213                         random = m->isTrue(temp);
214                         
215                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
216                         includeRoot = m->isTrue(temp);
217                         
218                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
219                         m->setProcessors(temp);
220                         m->mothurConvert(temp, processors); 
221                         
222             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
223                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
224             else {  
225                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
226                 else { subsample = false; }
227             }
228                         
229             if (!subsample) { subsampleIters = 0;   }
230             else { subsampleIters = iters;          }
231             
232             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
233                         consensus = m->isTrue(temp);
234             
235                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
236                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
237             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
238             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
239
240                         if (!random) {  iters = 0;  } //turn off random calcs
241                         
242                         //if user selects distance = true and no groups it won't calc the pairwise
243                         if ((phylip) && (Groups.size() == 0)) {
244                                 groups = "all";
245                                 m->splitAtDash(groups, Groups);
246                                 m->setGroups(Groups);
247                         }
248                         
249                         if (namefile == "") {
250                                 vector<string> files; files.push_back(treefile);
251                                 parser.getNameFile(files);
252                         }
253                 }
254                 
255         }
256         catch(exception& e) {
257                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
258                 exit(1);
259         }
260 }
261
262 /***********************************************************/
263 int UnifracUnweightedCommand::execute() {
264         try {
265                 
266                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
267                 
268                 m->setTreeFile(treefile);
269                 
270                 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
271         T = reader->getTrees();
272         tmap = T[0]->getTreeMap();
273         map<string, string> nameMap = reader->getNames();
274         map<string, string> unique2Dup = reader->getNameMap();
275         delete reader;  
276         
277                 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
278                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
279                 m->openOutputFile(sumFile, outSum);
280                 
281                 SharedUtil util;
282                 Groups = m->getGroups();
283                 vector<string> namesGroups = tmap->getNamesOfGroups();
284                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
285                 
286                 Unweighted unweighted(includeRoot);
287                 
288                 int start = time(NULL);
289         
290         //set or check size
291         if (subsample) {
292             //user has not set size, set size = smallest samples size
293             if (subsampleSize == -1) { 
294                 vector<string> temp; temp.push_back(Groups[0]);
295                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
296                 for (int i = 1; i < Groups.size(); i++) {
297                     temp.clear(); temp.push_back(Groups[i]);
298                     int thisSize = (tmap->getNamesSeqs(temp)).size();
299                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
300                 }
301                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
302             }else { //eliminate any too small groups
303                 vector<string> newGroups = Groups;
304                 Groups.clear();
305                 for (int i = 0; i < newGroups.size(); i++) {
306                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
307                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
308                     int thisSize = thisGroupsSeqs.size();
309                     
310                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
311                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
312                 } 
313                 m->setGroups(Groups);
314             }
315         }
316                 
317         util.getCombos(groupComb, Groups, numComp);
318                 m->setGroups(Groups);
319         
320                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
321         
322                 if (numComp < processors) { processors = numComp;  }
323         
324         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
325                 
326                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
327                 m->mothurOut("Tree#\tGroups\tUWScore\t");
328                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
329                 outSum << endl; m->mothurOutEndLine();
330          
331                 //get pscores for users trees
332                 for (int i = 0; i < T.size(); i++) {
333                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
334                         
335             counter = 0;
336                         
337                         if (random)  {  
338                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
339                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
340                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
341                         }
342                         
343                         
344                         //get unweighted for users tree
345                         rscoreFreq.resize(numComp);  
346                         rCumul.resize(numComp);  
347                         utreeScores.resize(numComp);  
348                         UWScoreSig.resize(numComp); 
349             
350             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
351
352                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
353                 
354                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  }return 0; }
355                         
356                         //output scores for each combination
357                         for(int k = 0; k < numComp; k++) {
358                                 //saves users score
359                                 utreeScores[k].push_back(userData[k]);
360                                 
361                                 //add users score to validscores
362                                 validScores[userData[k]] = userData[k];
363                 
364                 if (!random) { UWScoreSig[k].push_back(0.0);    }
365                         }
366             
367             if (random) {  runRandomCalcs(T[i], userData);  }
368                         
369                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
370             
371             int startSubsample = time(NULL);
372             
373             //subsample loop
374             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
375             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
376                 if (m->control_pressed) { break; }
377                 
378                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
379                 TreeMap* newTmap = new TreeMap();
380                 //newTmap->getCopy(*tmap);
381                 
382                 //SubSample sample;
383                 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
384                 
385                 //uses method of setting groups to doNotIncludeMe
386                 SubSample sample;
387                 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
388                 
389                 //call new weighted function
390                 vector<double> iterData; iterData.resize(numComp,0);
391                 Unweighted thisUnweighted(includeRoot);
392                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
393                 
394                 //save data to make ave dist, std dist
395                 calcDistsTotals.push_back(iterData);
396                 
397                 delete newTmap;
398                 delete subSampleTree;
399                 
400                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
401             }
402             m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
403             
404             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0;  }
405
406             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
407             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
408             
409             //print output files
410                         printUWSummaryFile(i);
411                         if (random)  {  printUnweightedFile();  delete output;  }
412                         if (phylip) {   createPhylipFile(i);            }
413                         
414                         rscoreFreq.clear(); 
415                         rCumul.clear();  
416                         validScores.clear(); 
417                         utreeScores.clear();  
418                         UWScoreSig.clear(); 
419                 }
420                 
421
422                 outSum.close();
423                 delete tmap; 
424                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
425                 
426                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
427                 
428                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
429                 
430                 //set phylip file as new current phylipfile
431                 string current = "";
432                 itTypes = outputTypes.find("phylip");
433                 if (itTypes != outputTypes.end()) {
434                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
435                 }
436                 
437                 //set column file as new current columnfile
438                 itTypes = outputTypes.find("column");
439                 if (itTypes != outputTypes.end()) {
440                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
441                 }
442                 
443                 m->mothurOutEndLine();
444                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
445                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
446                 m->mothurOutEndLine();
447                 
448                 return 0;
449                 
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
453                 exit(1);
454         }
455 }
456 /**************************************************************************************************/
457 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
458         try {
459         //we need to find the average distance and standard deviation for each groups distance
460         
461         //finds sum
462         vector<double> averages; averages.resize(numComp, 0); 
463         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
464             for (int i = 0; i < dists[thisIter].size(); i++) {  
465                 averages[i] += dists[thisIter][i];
466             }
467         }
468         
469         //finds average.
470         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
471         
472         //find standard deviation
473         vector<double> stdDev; stdDev.resize(numComp, 0);
474         
475         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
476             for (int j = 0; j < dists[thisIter].size(); j++) {
477                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
478             }
479         }
480         for (int i = 0; i < stdDev.size(); i++) {  
481             stdDev[i] /= (float) subsampleIters; 
482             stdDev[i] = sqrt(stdDev[i]);
483         }
484         
485         //make matrix with scores in it
486         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
487         for (int i = 0; i < m->getNumGroups(); i++) {
488             avedists[i].resize(m->getNumGroups(), 0.0);
489         }
490         
491         //make matrix with scores in it
492         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
493         for (int i = 0; i < m->getNumGroups(); i++) {
494             stddists[i].resize(m->getNumGroups(), 0.0);
495         }
496         
497         //flip it so you can print it
498         int count = 0;
499         for (int r=0; r<m->getNumGroups(); r++) { 
500             for (int l = 0; l < r; l++) {
501                 avedists[r][l] = averages[count];
502                 avedists[l][r] = averages[count];
503                 stddists[r][l] = stdDev[count];
504                 stddists[l][r] = stdDev[count];
505                 count++;
506             }
507         }
508         
509         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
510         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
511         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
512         ofstream out;
513         m->openOutputFile(aveFileName, out);
514         
515         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
516         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
517         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
518         ofstream outStd;
519         m->openOutputFile(stdFileName, outStd);
520         
521         if ((outputForm == "lt") || (outputForm == "square")) {
522             //output numSeqs
523             out << m->getNumGroups() << endl;
524             outStd << m->getNumGroups() << endl;
525         }
526         
527         //output to file
528         for (int r=0; r<m->getNumGroups(); r++) { 
529             //output name
530             string name = (m->getGroups())[r];
531             if (name.length() < 10) { //pad with spaces to make compatible
532                 while (name.length() < 10) {  name += " ";  }
533             }
534             
535             if (outputForm == "lt") {
536                 out << name << '\t';
537                 outStd << name << '\t';
538                 
539                 //output distances
540                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
541                 out << endl;  outStd << endl;
542             }else if (outputForm == "square") {
543                 out << name << '\t';
544                 outStd << name << '\t';
545                 
546                 //output distances
547                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
548                 out << endl; outStd << endl;
549             }else{
550                 //output distances
551                 for (int l = 0; l < r; l++) {   
552                     string otherName = (m->getGroups())[l];
553                     if (otherName.length() < 10) { //pad with spaces to make compatible
554                         while (otherName.length() < 10) {  otherName += " ";  }
555                     }
556                     
557                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
558                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
559                 }
560             }
561         }
562         out.close();
563         outStd.close();
564         
565         return 0;
566     }
567         catch(exception& e) {
568                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
569                 exit(1);
570         }
571 }
572
573 /**************************************************************************************************/
574 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
575         try {
576         
577         //used in tree constructor 
578         m->runParse = false;
579         
580         //create treemap class from groupmap for tree class to use
581         TreeMap newTmap;
582         newTmap.makeSim(m->getGroups());
583         
584         //clear  old tree names if any
585         m->Treenames.clear();
586         
587         //fills globaldatas tree names
588         m->Treenames = m->getGroups();
589         
590         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
591         
592         if (m->control_pressed) { return 0; }
593         
594         Consensus con;
595         Tree* conTree = con.getTree(newTrees);
596         
597         //create a new filename
598         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");                             
599         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
600         ofstream outTree;
601         m->openOutputFile(conFile, outTree);
602         
603         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
604         outTree.close();
605         
606         return 0;
607         
608     }
609         catch(exception& e) {
610                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
611                 exit(1);
612         }
613 }
614 /**************************************************************************************************/
615
616 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
617         try {
618         
619         vector<Tree*> trees;
620         
621         //create a new filename
622         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");                           
623         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
624         
625         ofstream outAll;
626         m->openOutputFile(outputFile, outAll);
627         
628         
629         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
630             
631             if (m->control_pressed) { break; }
632             
633             //make matrix with scores in it
634             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
635             for (int j = 0; j < m->getNumGroups(); j++) {
636                 sims[j].resize(m->getNumGroups(), 0.0);
637             }
638             
639             int count = 0;
640                         for (int r=0; r<m->getNumGroups(); r++) { 
641                                 for (int l = 0; l < r; l++) {
642                     double sim = -(dists[i][count]-1.0);
643                                         sims[r][l] = sim;
644                                         sims[l][r] = sim;
645                                         count++;
646                                 }
647                         }
648             
649             //create tree
650             Tree* tempTree = new Tree(&mytmap, sims);
651             map<string, string> empty;
652             tempTree->assembleTree(empty);
653             
654             trees.push_back(tempTree);
655             
656             //print tree
657             tempTree->print(outAll);
658         }
659         
660         outAll.close();
661         
662         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
663         
664         return trees;
665     }
666         catch(exception& e) {
667                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
668                 exit(1);
669         }
670 }
671 /**************************************************************************************************/
672
673 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
674         try {
675         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
676         
677         Unweighted unweighted(includeRoot);
678         
679         //get unweighted scores for random trees - if random is false iters = 0
680         for (int j = 0; j < iters; j++) {
681             
682             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
683             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
684             
685             if (m->control_pressed) { return 0; }
686                         
687             for(int k = 0; k < numComp; k++) {  
688                 //add trees unweighted score to map of scores
689                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
690                 if (it != rscoreFreq[k].end()) {//already have that score
691                     rscoreFreq[k][randomData[k]]++;
692                 }else{//first time we have seen this score
693                     rscoreFreq[k][randomData[k]] = 1;
694                 }
695                                 
696                 //add randoms score to validscores
697                 validScores[randomData[k]] = randomData[k];
698             }
699         }
700         
701         for(int a = 0; a < numComp; a++) {
702             float rcumul = 1.0000;
703     
704             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
705             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
706                 //make rscoreFreq map and rCumul
707                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
708                 rCumul[a][it->first] = rcumul;
709                 //get percentage of random trees with that info
710                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
711                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
712             }
713             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
714         }
715         
716         return 0;
717         }
718         catch(exception& e) {
719                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
720                 exit(1);
721         }
722 }
723 /***********************************************************/
724 void UnifracUnweightedCommand::printUnweightedFile() {
725         try {
726                 vector<double> data;
727                 vector<string> tags;
728                 
729                 tags.push_back("Score");
730                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
731                         
732                 for(int a = 0; a < numComp; a++) {
733                         output->initFile(groupComb[a], tags);
734                         //print each line
735                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
736                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
737                                 output->output(data);
738                                 data.clear();
739                         } 
740                         output->resetFile();
741                 }
742         }
743         catch(exception& e) {
744                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
745                 exit(1);
746         }
747 }
748
749 /***********************************************************/
750 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
751         try {
752                                 
753                 //format output
754                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
755                         
756                 //print each line
757
758                 for(int a = 0; a < numComp; a++) {
759                         outSum << i+1 << '\t';
760                         m->mothurOut(toString(i+1) + "\t");
761                         
762                         if (random) {
763                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
764                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
765                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
766                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
767                                 }else {
768                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
769                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
770                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
771                                 }
772                         }else{
773                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
774                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
775                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
776                         }
777                 }
778                 
779         }
780         catch(exception& e) {
781                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
782                 exit(1);
783         }
784 }
785 /***********************************************************/
786 void UnifracUnweightedCommand::createPhylipFile(int i) {
787         try {
788                 string phylipFileName;
789                 if ((outputForm == "lt") || (outputForm == "square")) {
790                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
791                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
792                 }else { //column
793                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
794                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
795                 }
796                 
797                 ofstream out;
798                 m->openOutputFile(phylipFileName, out);
799                 
800                 if ((outputForm == "lt") || (outputForm == "square")) {
801                         //output numSeqs
802                         out << m->getNumGroups() << endl;
803                 }
804                 
805                 //make matrix with scores in it
806                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
807                 for (int i = 0; i < m->getNumGroups(); i++) {
808                         dists[i].resize(m->getNumGroups(), 0.0);
809                 }
810                 
811                 //flip it so you can print it
812                 int count = 0;
813                 for (int r=0; r<m->getNumGroups(); r++) { 
814                         for (int l = 0; l < r; l++) {
815                                 dists[r][l] = utreeScores[count][0];
816                                 dists[l][r] = utreeScores[count][0];
817                                 count++;
818                         }
819                 }
820                 
821                 //output to file
822                 for (int r=0; r<m->getNumGroups(); r++) { 
823                         //output name
824                         string name = (m->getGroups())[r];
825                         if (name.length() < 10) { //pad with spaces to make compatible
826                                 while (name.length() < 10) {  name += " ";  }
827                         }
828                         
829                         if (outputForm == "lt") {
830                                 out << name << '\t';
831                         
832                                 //output distances
833                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
834                                 out << endl;
835                         }else if (outputForm == "square") {
836                                 out << name << '\t';
837                                 
838                                 //output distances
839                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
840                                 out << endl;
841                         }else{
842                                 //output distances
843                                 for (int l = 0; l < r; l++) {   
844                                         string otherName = (m->getGroups())[l];
845                                         if (otherName.length() < 10) { //pad with spaces to make compatible
846                                                 while (otherName.length() < 10) {  otherName += " ";  }
847                                         }
848                                         
849                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
850                                 }
851                         }
852                 }
853                 out.close();
854         }
855         catch(exception& e) {
856                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
857                 exit(1);
858         }
859 }
860 /***********************************************************/
861
862
863
864