]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
changes while testing
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","unweighted-uwsummary",false,true,true); parameters.push_back(ptree);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
26                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
27         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
28         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
29         CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string UnifracUnweightedCommand::getHelpString(){       
44         try {
45                 string helpString = "";
46                 helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
48                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
50                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
51                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
54         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
55         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
56                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
57                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string UnifracUnweightedCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         if (type == "unweighted")            {  pattern = "[filename],unweighted-[filename],[tag],unweighted";   }
72         else if (type == "uwsummary")        {  pattern = "[filename],uwsummary";   }
73         else if (type == "phylip")           {  pattern = "[filename],[tag],[tag2],dist";   }
74         else if (type == "column")           {  pattern = "[filename],[tag],[tag2],dist";   }
75         else if (type == "tree")             {  pattern = "[filename],[tag],[tag2],tre";   }
76         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
77         
78         return pattern;
79     }
80     catch(exception& e) {
81         m->errorOut(e, "UnifracUnweightedCommand", "getOutputPattern");
82         exit(1);
83     }
84 }
85 //**********************************************************************************************************************
86 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["unweighted"] = tempOutNames;
92                 outputTypes["uwsummary"] = tempOutNames;
93                 outputTypes["phylip"] = tempOutNames;
94                 outputTypes["column"] = tempOutNames;
95         outputTypes["tree"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
99                 exit(1);
100         }
101 }
102 /***********************************************************/
103 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 
107                         
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         map<string,string>::iterator it;
118                         
119                         ValidParameters validParameter;
120                 
121                         //check to make sure all parameters are valid for command
122                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["unweighted"] = tempOutNames;
129                         outputTypes["uwsummary"] = tempOutNames;
130                         outputTypes["phylip"] = tempOutNames;
131                         outputTypes["column"] = tempOutNames;
132             outputTypes["tree"] = tempOutNames;
133                         
134                         //if the user changes the input directory command factory will send this info to us in the output parameter 
135                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
136                         if (inputDir == "not found"){   inputDir = "";          }
137                         else {
138                                 string path;
139                                 it = parameters.find("tree");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
145                                 }
146                                 
147                                 it = parameters.find("group");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
153                                 }
154                                 
155                                 it = parameters.find("name");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
161                                 }
162                 
163                 it = parameters.find("count");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
169                                 }
170                         }
171                         
172             //check for required parameters
173                         treefile = validParameter.validFile(parameters, "tree", true);
174                         if (treefile == "not open") { abort = true; }
175                         else if (treefile == "not found") {                             //if there is a current design file, use it
176                                 treefile = m->getTreeFile(); 
177                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
179                         }else { m->setTreeFile(treefile); }     
180                         
181                         //check for required parameters
182                         groupfile = validParameter.validFile(parameters, "group", true);
183                         if (groupfile == "not open") { abort = true; }
184                         else if (groupfile == "not found") { groupfile = ""; }
185                         else { m->setGroupFile(groupfile); }
186                         
187                         namefile = validParameter.validFile(parameters, "name", true);
188                         if (namefile == "not open") { namefile = ""; abort = true; }
189                         else if (namefile == "not found") { namefile = ""; }
190                         else { m->setNameFile(namefile); }
191             
192             countfile = validParameter.validFile(parameters, "count", true);
193                         if (countfile == "not open") { countfile = ""; abort = true; }
194                         else if (countfile == "not found") { countfile = "";  } 
195                         else { m->setCountTableFile(countfile); }
196             
197             if ((namefile != "") && (countfile != "")) {
198                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
199             }
200                         
201             if ((groupfile != "") && (countfile != "")) {
202                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
203             }
204                         
205                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
206                         
207                         //check for optional parameter and set defaults
208                         // ...at some point should added some additional type checking...
209                         groups = validParameter.validFile(parameters, "groups", false);                 
210                         if (groups == "not found") { groups = ""; }
211                         else { 
212                                 m->splitAtDash(groups, Groups);
213                                 m->setGroups(Groups);
214                         }
215                                 
216                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
217                         m->mothurConvert(itersString, iters); 
218                         
219                         string temp = validParameter.validFile(parameters, "distance", false);                  
220                         if (temp == "not found") { phylip = false; outputForm = ""; }
221                         else{
222                 if (temp=="phylip") { temp = "lt"; }
223                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
224                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
225                         }
226                         
227                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
228                         random = m->isTrue(temp);
229                         
230                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
231                         includeRoot = m->isTrue(temp);
232                         
233                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
234                         m->setProcessors(temp);
235                         m->mothurConvert(temp, processors); 
236                         
237             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
238                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
239             else {  
240                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
241                 else { subsample = false; }
242             }
243                         
244             if (!subsample) { subsampleIters = 0;   }
245             else { subsampleIters = iters;          }
246             
247             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
248                         consensus = m->isTrue(temp);
249             
250                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
251             if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
252             else {  
253                 CountTable testCt; 
254                 if ((!testCt.testGroups(countfile)) && (subsample)) {
255                     m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
256                 }
257             }
258             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
259             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
260
261                         if (!random) {  iters = 0;  } //turn off random calcs
262                         
263                         //if user selects distance = true and no groups it won't calc the pairwise
264                         if ((phylip) && (Groups.size() == 0)) {
265                                 groups = "all";
266                                 m->splitAtDash(groups, Groups);
267                                 m->setGroups(Groups);
268                         }
269                         
270                         if (countfile=="") {
271                 if (namefile == "") {
272                     vector<string> files; files.push_back(treefile);
273                     parser.getNameFile(files);
274                 } 
275             }
276                 }
277                 
278         }
279         catch(exception& e) {
280                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
281                 exit(1);
282         }
283 }
284
285 /***********************************************************/
286 int UnifracUnweightedCommand::execute() {
287         try {
288                 
289                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
290                 
291                 m->setTreeFile(treefile);
292                 
293                 TreeReader* reader;
294         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
295         else { reader = new TreeReader(treefile, countfile); }
296         T = reader->getTrees();
297         ct = T[0]->getCountTable();
298         delete reader;
299         
300         map<string, string> variables; 
301                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
302                 sumFile = getOutputFileName("uwsummary",variables);
303                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
304                 m->openOutputFile(sumFile, outSum);
305                 
306                 SharedUtil util;
307                 Groups = m->getGroups();
308                 vector<string> namesGroups = ct->getNamesOfGroups();
309                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
310                 
311                 Unweighted unweighted(includeRoot);
312                 
313                 int start = time(NULL);
314         
315         //set or check size
316         if (subsample) {
317             //user has not set size, set size = smallest samples size
318             if (subsampleSize == -1) { 
319                 vector<string> temp; temp.push_back(Groups[0]);
320                 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
321                 for (int i = 1; i < Groups.size(); i++) {
322                     int thisSize = ct->getGroupCount(Groups[i]);
323                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
324                 }
325                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
326             }else { //eliminate any too small groups
327                 vector<string> newGroups = Groups;
328                 Groups.clear();
329                 for (int i = 0; i < newGroups.size(); i++) {
330                     int thisSize = ct->getGroupCount(newGroups[i]);
331                     
332                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
333                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
334                 } 
335                 m->setGroups(Groups);
336             }
337         }
338                 
339         util.getCombos(groupComb, Groups, numComp);
340                 m->setGroups(Groups);
341         
342                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
343         
344                 if (numComp < processors) { processors = numComp;  }
345         
346         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
347                 
348                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
349                 m->mothurOut("Tree#\tGroups\tUWScore\t");
350                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
351                 outSum << endl; m->mothurOutEndLine();
352          
353                 //get pscores for users trees
354                 for (int i = 0; i < T.size(); i++) {
355                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
356                         
357             counter = 0;
358                         
359                         if (random)  {  
360                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
361                 variables["[tag]"] = toString(i+1);
362                 string unFileName = getOutputFileName("unweighted", variables);
363                                 output = new ColumnFile(unFileName, itersString);
364                                 outputNames.push_back(unFileName); outputTypes["unweighted"].push_back(unFileName);
365                         }
366                         
367                         
368                         //get unweighted for users tree
369                         rscoreFreq.resize(numComp);  
370                         rCumul.resize(numComp);  
371                         utreeScores.resize(numComp);  
372                         UWScoreSig.resize(numComp); 
373             
374             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
375
376                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
377                 
378                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  }return 0; }
379                         
380                         //output scores for each combination
381                         for(int k = 0; k < numComp; k++) {
382                                 //saves users score
383                                 utreeScores[k].push_back(userData[k]);
384                                 
385                                 //add users score to validscores
386                                 validScores[userData[k]] = userData[k];
387                 
388                 if (!random) { UWScoreSig[k].push_back(0.0);    }
389                         }
390             
391             if (random) {  runRandomCalcs(T[i], userData);  }
392                         
393                         if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0;  }
394             
395             int startSubsample = time(NULL);
396             
397             //subsample loop
398             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
399             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
400                 if (m->control_pressed) { break; }
401                 
402                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
403                 CountTable* newCt = new CountTable();
404                  
405                 //uses method of setting groups to doNotIncludeMe
406                 int sampleTime = 0;
407                 if (m->debug) { sampleTime = time(NULL); }
408                 SubSample sample;
409                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
410                 
411                 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
412                 
413                 //call new weighted function
414                 vector<double> iterData; iterData.resize(numComp,0);
415                 Unweighted thisUnweighted(includeRoot);
416                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
417         
418                 //save data to make ave dist, std dist
419                 calcDistsTotals.push_back(iterData);
420                 
421                 delete newCt;
422                 delete subSampleTree;
423                 
424                 if((thisIter+1) % 100 == 0){    m->mothurOutJustToScreen(toString(thisIter+1)+"\n");            }
425             }
426             if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
427             
428             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
429
430             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
431             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
432             
433             //print output files
434                         printUWSummaryFile(i);
435                         if (random)  {  printUnweightedFile();  delete output;  }
436                         if (phylip) {   createPhylipFile(i);            }
437                         
438                         rscoreFreq.clear(); 
439                         rCumul.clear();  
440                         validScores.clear(); 
441                         utreeScores.clear();  
442                         UWScoreSig.clear(); 
443                 }
444                 
445
446                 outSum.close();
447                 delete ct; 
448                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
449                 
450                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
451                 
452                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
453                 
454                 //set phylip file as new current phylipfile
455                 string current = "";
456                 itTypes = outputTypes.find("phylip");
457                 if (itTypes != outputTypes.end()) {
458                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
459                 }
460                 
461                 //set column file as new current columnfile
462                 itTypes = outputTypes.find("column");
463                 if (itTypes != outputTypes.end()) {
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
465                 }
466                 
467                 m->mothurOutEndLine();
468                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
469                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
470                 m->mothurOutEndLine();
471                 
472                 return 0;
473                 
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
477                 exit(1);
478         }
479 }
480 /**************************************************************************************************/
481 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
482         try {
483         //we need to find the average distance and standard deviation for each groups distance
484         //finds sum
485         vector<double> averages = m->getAverages(dists);
486         
487         //find standard deviation
488         vector<double> stdDev = m->getStandardDeviation(dists, averages);
489         
490         //make matrix with scores in it
491         vector< vector<double> > avedists;      //avedists.resize(m->getNumGroups());
492         for (int i = 0; i < m->getNumGroups(); i++) {
493             vector<double> temp;
494             for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
495             avedists.push_back(temp);
496         }
497         
498         //make matrix with scores in it
499         vector< vector<double> > stddists;      //stddists.resize(m->getNumGroups());
500         for (int i = 0; i < m->getNumGroups(); i++) {
501             vector<double> temp;
502             for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
503             //stddists[i].resize(m->getNumGroups(), 0.0);
504             stddists.push_back(temp);
505         }
506         
507         if (m->debug) { m->mothurOut("[DEBUG]: about to fill matrix.\n"); }
508         
509         //flip it so you can print it
510         int count = 0;
511         for (int r=0; r<m->getNumGroups(); r++) { 
512             for (int l = 0; l < r; l++) {
513                 avedists[r][l] = averages[count];
514                 avedists[l][r] = averages[count];
515                 stddists[r][l] = stdDev[count];
516                 stddists[l][r] = stdDev[count];
517                 count++;
518             }
519         }
520         
521         if (m->debug) { m->mothurOut("[DEBUG]: done filling matrix.\n"); }
522         
523         map<string, string> variables; 
524                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
525         variables["[tag]"] = toString(treeNum+1);
526         variables["[tag2]"] = "unweighted.ave";
527         string aveFileName = getOutputFileName("phylip",variables);
528         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
529         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
530         ofstream out;
531         m->openOutputFile(aveFileName, out);
532         
533         variables["[tag2]"] = "unweighted.std";
534         string stdFileName = getOutputFileName("phylip",variables);
535         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
536         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
537         ofstream outStd;
538         m->openOutputFile(stdFileName, outStd);
539         
540         if ((outputForm == "lt") || (outputForm == "square")) {
541             //output numSeqs
542             out << m->getNumGroups() << endl;
543             outStd << m->getNumGroups() << endl;
544         }
545         
546         //output to file
547         for (int r=0; r<m->getNumGroups(); r++) { 
548             //output name
549             string name = (m->getGroups())[r];
550             if (name.length() < 10) { //pad with spaces to make compatible
551                 while (name.length() < 10) {  name += " ";  }
552             }
553             
554             if (outputForm == "lt") {
555                 out << name << '\t';
556                 outStd << name << '\t';
557                 
558                 //output distances
559                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
560                 out << endl;  outStd << endl;
561             }else if (outputForm == "square") {
562                 out << name << '\t';
563                 outStd << name << '\t';
564                 
565                 //output distances
566                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
567                 out << endl; outStd << endl;
568             }else{
569                 //output distances
570                 for (int l = 0; l < r; l++) {   
571                     string otherName = (m->getGroups())[l];
572                     if (otherName.length() < 10) { //pad with spaces to make compatible
573                         while (otherName.length() < 10) {  otherName += " ";  }
574                     }
575                     
576                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
577                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
578                 }
579             }
580         }
581         out.close();
582         outStd.close();
583         
584         return 0;
585     }
586         catch(exception& e) {
587                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
588                 exit(1);
589         }
590 }
591
592 /**************************************************************************************************/
593 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
594         try {
595         
596         //used in tree constructor 
597         m->runParse = false;
598         
599         //create treemap class from groupmap for tree class to use
600         CountTable newCt;
601         set<string> nameMap;
602         map<string, string> groupMap;
603         set<string> gps;
604         for (int i = 0; i < m->getGroups().size(); i++) { 
605             nameMap.insert(m->getGroups()[i]); 
606             gps.insert(m->getGroups()[i]); 
607             groupMap[m->getGroups()[i]] = m->getGroups()[i];
608         }
609         newCt.createTable(nameMap, groupMap, gps);
610         
611         //clear  old tree names if any
612         m->Treenames.clear();
613         
614         //fills globaldatas tree names
615         m->Treenames = m->getGroups();
616         
617         vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
618         
619         if (m->control_pressed) { return 0; }
620         
621         Consensus con;
622         Tree* conTree = con.getTree(newTrees);
623         
624         //create a new filename
625         map<string, string> variables; 
626                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
627         variables["[tag]"] = toString(treeNum+1);
628         variables["[tag2]"] = "unweighted.cons";
629         string conFile = getOutputFileName("tree",variables);                           
630         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
631         ofstream outTree;
632         m->openOutputFile(conFile, outTree);
633         
634         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
635         outTree.close();
636         
637         return 0;
638         
639     }
640         catch(exception& e) {
641                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
642                 exit(1);
643         }
644 }
645 /**************************************************************************************************/
646
647 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
648         try {
649         
650         vector<Tree*> trees;
651         
652         //create a new filename
653         map<string, string> variables; 
654                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
655         variables["[tag]"] = toString(treeNum+1);
656         variables["[tag2]"] = "unweighted.all";
657         string outputFile = getOutputFileName("tree",variables);                                
658         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
659         
660         ofstream outAll;
661         m->openOutputFile(outputFile, outAll);
662         
663         
664         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
665             
666             if (m->control_pressed) { break; }
667             
668             //make matrix with scores in it
669             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
670             for (int j = 0; j < m->getNumGroups(); j++) {
671                 sims[j].resize(m->getNumGroups(), 0.0);
672             }
673             
674             int count = 0;
675                         for (int r=0; r<m->getNumGroups(); r++) { 
676                                 for (int l = 0; l < r; l++) {
677                     double sim = -(dists[i][count]-1.0);
678                                         sims[r][l] = sim;
679                                         sims[l][r] = sim;
680                                         count++;
681                                 }
682                         }
683             
684             //create tree
685             Tree* tempTree = new Tree(&myct, sims);
686             tempTree->assembleTree();
687             
688             trees.push_back(tempTree);
689             
690             //print tree
691             tempTree->print(outAll);
692         }
693         
694         outAll.close();
695         
696         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
697         
698         return trees;
699     }
700         catch(exception& e) {
701                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
702                 exit(1);
703         }
704 }
705 /**************************************************************************************************/
706
707 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
708         try {
709         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
710         
711         Unweighted unweighted(includeRoot);
712         
713         //get unweighted scores for random trees - if random is false iters = 0
714         for (int j = 0; j < iters; j++) {
715             
716             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
717             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
718             
719             if (m->control_pressed) { return 0; }
720                         
721             for(int k = 0; k < numComp; k++) {  
722                 //add trees unweighted score to map of scores
723                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
724                 if (it != rscoreFreq[k].end()) {//already have that score
725                     rscoreFreq[k][randomData[k]]++;
726                 }else{//first time we have seen this score
727                     rscoreFreq[k][randomData[k]] = 1;
728                 }
729                                 
730                 //add randoms score to validscores
731                 validScores[randomData[k]] = randomData[k];
732             }
733         }
734         
735         for(int a = 0; a < numComp; a++) {
736             float rcumul = 1.0000;
737     
738             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
739             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
740                 //make rscoreFreq map and rCumul
741                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
742                 rCumul[a][it->first] = rcumul;
743                 //get percentage of random trees with that info
744                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
745                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
746             }
747             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
748         }
749         
750         return 0;
751         }
752         catch(exception& e) {
753                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
754                 exit(1);
755         }
756 }
757 /***********************************************************/
758 void UnifracUnweightedCommand::printUnweightedFile() {
759         try {
760                 vector<double> data;
761                 vector<string> tags;
762                 
763                 tags.push_back("Score");
764                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
765                         
766                 for(int a = 0; a < numComp; a++) {
767                         output->initFile(groupComb[a], tags);
768                         //print each line
769                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
770                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
771                                 output->output(data);
772                                 data.clear();
773                         } 
774                         output->resetFile();
775                 }
776         }
777         catch(exception& e) {
778                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
779                 exit(1);
780         }
781 }
782
783 /***********************************************************/
784 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
785         try {
786                                 
787                 //format output
788                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
789                         
790                 //print each line
791
792                 for(int a = 0; a < numComp; a++) {
793                         outSum << i+1 << '\t';
794                         m->mothurOut(toString(i+1) + "\t");
795                         
796                         if (random) {
797                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
798                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
799                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
800                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
801                                 }else {
802                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
803                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
804                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
805                                 }
806                         }else{
807                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
808                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
809                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
810                         }
811                 }
812                 
813         }
814         catch(exception& e) {
815                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
816                 exit(1);
817         }
818 }
819 /***********************************************************/
820 void UnifracUnweightedCommand::createPhylipFile(int i) {
821         try {
822                 string phylipFileName;
823         map<string, string> variables; 
824                 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
825         variables["[tag]"] = toString(i+1);
826                 if ((outputForm == "lt") || (outputForm == "square")) {
827             variables["[tag2]"] = "unweighted.phylip";
828                         phylipFileName = getOutputFileName("phylip",variables);
829                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
830                 }else { //column
831             variables["[tag2]"] = "unweighted.column";
832                         phylipFileName = getOutputFileName("column",variables);
833                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
834                 }
835                 
836                 ofstream out;
837                 m->openOutputFile(phylipFileName, out);
838                 
839                 if ((outputForm == "lt") || (outputForm == "square")) {
840                         //output numSeqs
841                         out << m->getNumGroups() << endl;
842                 }
843                 
844                 //make matrix with scores in it
845                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
846                 for (int i = 0; i < m->getNumGroups(); i++) {
847                         dists[i].resize(m->getNumGroups(), 0.0);
848                 }
849                 
850                 //flip it so you can print it
851                 int count = 0;
852                 for (int r=0; r<m->getNumGroups(); r++) { 
853                         for (int l = 0; l < r; l++) {
854                                 dists[r][l] = utreeScores[count][0];
855                                 dists[l][r] = utreeScores[count][0];
856                                 count++;
857                         }
858                 }
859                 
860                 //output to file
861                 for (int r=0; r<m->getNumGroups(); r++) { 
862                         //output name
863                         string name = (m->getGroups())[r];
864                         if (name.length() < 10) { //pad with spaces to make compatible
865                                 while (name.length() < 10) {  name += " ";  }
866                         }
867                         
868                         if (outputForm == "lt") {
869                                 out << name << '\t';
870                         
871                                 //output distances
872                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
873                                 out << endl;
874                         }else if (outputForm == "square") {
875                                 out << name << '\t';
876                                 
877                                 //output distances
878                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
879                                 out << endl;
880                         }else{
881                                 //output distances
882                                 for (int l = 0; l < r; l++) {   
883                                         string otherName = (m->getGroups())[l];
884                                         if (otherName.length() < 10) { //pad with spaces to make compatible
885                                                 while (otherName.length() < 10) {  otherName += " ";  }
886                                         }
887                                         
888                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
889                                 }
890                         }
891                 }
892                 out.close();
893         }
894         catch(exception& e) {
895                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
896                 exit(1);
897         }
898 }
899 /***********************************************************/
900
901
902
903