]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
Merge branch 'master' of https://github.com/mothur/mothur.git into development
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){ 
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27                 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
28                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54         helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){  
68         try {
69         string outputFileName = "";
70                 map<string, vector<string> >::iterator it;
71         
72         //is this a type this command creates
73         it = outputTypes.find(type);
74         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75         else {
76             if (type == "weighted")            {   outputFileName =  "weighted";   }
77             else if (type == "wsummary")        {   outputFileName =  "wsummary";   }
78             else if (type == "phylip")           {   outputFileName =  "dist";   }
79             else if (type == "column")           {   outputFileName =  "dist";   }
80             else if (type == "tree")             {   outputFileName =  "tre";   }
81             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
82         }
83         return outputFileName;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 UnifracWeightedCommand::UnifracWeightedCommand(){       
92         try {
93                 abort = true; calledHelp = true; 
94                 setParameters();
95                 vector<string> tempOutNames;
96                 outputTypes["weighted"] = tempOutNames;
97                 outputTypes["wsummary"] = tempOutNames;
98                 outputTypes["phylip"] = tempOutNames;
99                 outputTypes["column"] = tempOutNames;
100         outputTypes["tree"] = tempOutNames;
101         }
102         catch(exception& e) {
103                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
104                 exit(1);
105         }
106 }
107
108 /***********************************************************/
109 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
110         try {
111                 abort = false; calledHelp = false;   
112                         
113                 //allow user to run help
114                 if(option == "help") { help(); abort = true; calledHelp = true; }
115                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
116                 
117                 else {
118                         vector<string> myArray = setParameters();
119                         
120                         OptionParser parser(option);
121                         map<string,string> parameters=parser.getParameters();
122                         map<string,string>::iterator it;
123                         
124                         ValidParameters validParameter;
125                 
126                         //check to make sure all parameters are valid for command
127                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["weighted"] = tempOutNames;
134                         outputTypes["wsummary"] = tempOutNames;
135                         outputTypes["phylip"] = tempOutNames;
136                         outputTypes["column"] = tempOutNames;
137             outputTypes["tree"] = tempOutNames;
138                         
139                         //if the user changes the input directory command factory will send this info to us in the output parameter 
140                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
141                         if (inputDir == "not found"){   inputDir = "";          }
142                         else {
143                                 string path;
144                                 it = parameters.find("tree");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
150                                 }
151                                 
152                                 it = parameters.find("group");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                         }
168                         
169                         //check for required parameters
170                         treefile = validParameter.validFile(parameters, "tree", true);
171                         if (treefile == "not open") { treefile = ""; abort = true; }
172                         else if (treefile == "not found") {                             //if there is a current design file, use it
173                                 treefile = m->getTreeFile(); 
174                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
175                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
176                         }else { m->setTreeFile(treefile); }     
177                         
178                         //check for required parameters
179                         groupfile = validParameter.validFile(parameters, "group", true);
180                         if (groupfile == "not open") { abort = true; }
181                         else if (groupfile == "not found") { groupfile = ""; }
182                         else { m->setGroupFile(groupfile); }
183                         
184                         namefile = validParameter.validFile(parameters, "name", true);
185                         if (namefile == "not open") { namefile = ""; abort = true; }
186                         else if (namefile == "not found") { namefile = ""; }
187                         else { m->setNameFile(namefile); }
188                         
189                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
190                         
191                                                                                                                                         
192                         //check for optional parameter and set defaults
193                         // ...at some point should added some additional type checking...
194                         groups = validParameter.validFile(parameters, "groups", false);                 
195                         if (groups == "not found") { groups = ""; }
196                         else { 
197                                 m->splitAtDash(groups, Groups);
198                                 m->setGroups(Groups);
199                         }
200                                 
201                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
202                         m->mothurConvert(itersString, iters); 
203                         
204                         string temp = validParameter.validFile(parameters, "distance", false);                  
205                         if (temp == "not found") { phylip = false; outputForm = ""; }
206                         else{
207                 if (temp=="phylip") { temp = "lt"; }
208                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
209                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
210                         }
211                         
212                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
213                         random = m->isTrue(temp);
214                         
215                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
216                         includeRoot = m->isTrue(temp);
217                         
218                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
219                         m->setProcessors(temp);
220                         m->mothurConvert(temp, processors);
221             
222             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
223                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
224             else {  
225                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
226                 else { subsample = false; }
227             }
228                         
229             if (!subsample) { subsampleIters = 0;   }
230             else { subsampleIters = iters;          }
231             
232             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
233                         consensus = m->isTrue(temp);
234             
235                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
236                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
237             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
238             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
239             
240                         if (namefile == "") {
241                                 vector<string> files; files.push_back(treefile);
242                                 parser.getNameFile(files);
243                         }
244                 }
245                 
246                 
247         }
248         catch(exception& e) {
249                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
250                 exit(1);
251         }
252 }
253 /***********************************************************/
254 int UnifracWeightedCommand::execute() {
255         try {
256         
257                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
258                 
259                 m->setTreeFile(treefile);
260                 
261         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
262         T = reader->getTrees();
263         tmap = T[0]->getTreeMap();
264         map<string, string> nameMap = reader->getNames();
265         map<string, string> unique2Dup = reader->getNameMap();
266         delete reader;
267     
268         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
269                                 
270                 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
271                 m->openOutputFile(sumFile, outSum);
272                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
273                 
274         SharedUtil util;
275                 string s; //to make work with setgroups
276                 Groups = m->getGroups();
277                 vector<string> nameGroups = tmap->getNamesOfGroups();
278                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
279                 m->setGroups(Groups);
280                 
281         if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
282         
283                 Weighted weighted(includeRoot);
284                         
285                 int start = time(NULL);
286             
287         //set or check size
288         if (subsample) {
289             //user has not set size, set size = smallest samples size
290             if (subsampleSize == -1) { 
291                 vector<string> temp; temp.push_back(Groups[0]);
292                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
293                 for (int i = 1; i < Groups.size(); i++) {
294                     temp.clear(); temp.push_back(Groups[i]);
295                     int thisSize = (tmap->getNamesSeqs(temp)).size();
296                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
297                 }
298                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
299             }else { //eliminate any too small groups
300                 vector<string> newGroups = Groups;
301                 Groups.clear();
302                 for (int i = 0; i < newGroups.size(); i++) {
303                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
304                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
305                     int thisSize = thisGroupsSeqs.size();
306                     
307                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
308                     else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
309                 } 
310                 m->setGroups(Groups);
311             }
312         }
313         
314         //here in case some groups are removed by subsample
315         util.getCombos(groupComb, Groups, numComp);
316         
317         if (numComp < processors) { processors = numComp; }
318         
319         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
320         
321         //get weighted scores for users trees
322         for (int i = 0; i < T.size(); i++) {
323             
324             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
325             
326             counter = 0;
327             rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
328             uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
329             
330             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
331             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
332             
333             if (random) {  
334                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);  
335                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
336                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
337             } 
338             
339             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
340             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
341             
342             //save users score
343             for (int s=0; s<numComp; s++) {
344                 //add users score to vector of user scores
345                 uScores[s].push_back(userData[s]);
346                 //save users tree score for summary file
347                 utreeScores.push_back(userData[s]);
348             }
349             
350             if (random) {  runRandomCalcs(T[i], userData); }
351             
352             //clear data
353             rScores.clear();
354             uScores.clear();
355             validScores.clear();
356             
357             //subsample loop
358             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
359             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
360                 
361                 if (m->control_pressed) { break; }
362                 
363                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
364                 TreeMap* newTmap = new TreeMap();
365                 //newTmap->getCopy(*tmap);
366                 
367                 //SubSample sample;
368                //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
369                 
370                 //uses method of setting groups to doNotIncludeMe
371                 SubSample sample;
372                 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
373
374                 //call new weighted function
375                 vector<double> iterData; iterData.resize(numComp,0);
376                 Weighted thisWeighted(includeRoot);
377                 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
378                 
379                 //save data to make ave dist, std dist
380                 calcDistsTotals.push_back(iterData);
381                 
382                 delete newTmap;
383                 delete subSampleTree;
384                 
385                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
386             }
387             
388             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
389             
390             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
391             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
392         }
393         
394                 
395                 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0;  }
396                 
397         if (phylip) {   createPhylipFile();             }
398     
399                 printWSummaryFile();
400                 
401                 //clear out users groups
402                 m->clearGroups();
403                 delete tmap; 
404                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
405                 
406                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
407                 
408                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
409                 
410                 //set phylip file as new current phylipfile
411                 string current = "";
412                 itTypes = outputTypes.find("phylip");
413                 if (itTypes != outputTypes.end()) {
414                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
415                 }
416                 
417                 //set column file as new current columnfile
418                 itTypes = outputTypes.find("column");
419                 if (itTypes != outputTypes.end()) {
420                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
421                 }
422                 
423                 m->mothurOutEndLine();
424                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
425                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
426                 m->mothurOutEndLine();
427                 
428                 return 0;
429                 
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "UnifracWeightedCommand", "execute");
433                 exit(1);
434         }
435 }
436 /**************************************************************************************************/
437 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
438         try {
439         //we need to find the average distance and standard deviation for each groups distance
440         
441         //finds sum
442         vector<double> averages; averages.resize(numComp, 0); 
443         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
444             for (int i = 0; i < dists[thisIter].size(); i++) {  
445                 averages[i] += dists[thisIter][i];
446             }
447         }
448         
449         //finds average.
450         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
451         
452         //find standard deviation
453         vector<double> stdDev; stdDev.resize(numComp, 0);
454                 
455         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
456             for (int j = 0; j < dists[thisIter].size(); j++) {
457                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
458             }
459         }
460         for (int i = 0; i < stdDev.size(); i++) {  
461             stdDev[i] /= (float) subsampleIters; 
462             stdDev[i] = sqrt(stdDev[i]);
463         }
464         
465         //make matrix with scores in it
466         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
467         for (int i = 0; i < m->getNumGroups(); i++) {
468             avedists[i].resize(m->getNumGroups(), 0.0);
469         }
470         
471         //make matrix with scores in it
472         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
473         for (int i = 0; i < m->getNumGroups(); i++) {
474             stddists[i].resize(m->getNumGroups(), 0.0);
475         }
476         
477         //flip it so you can print it
478         int count = 0;
479         for (int r=0; r<m->getNumGroups(); r++) { 
480             for (int l = 0; l < r; l++) {
481                 avedists[r][l] = averages[count];
482                 avedists[l][r] = averages[count];
483                 stddists[r][l] = stdDev[count];
484                 stddists[l][r] = stdDev[count];
485                 count++;
486             }
487         }
488         
489         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
490         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
491         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
492         ofstream out;
493         m->openOutputFile(aveFileName, out);
494         
495         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
496         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
497         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
498         ofstream outStd;
499         m->openOutputFile(stdFileName, outStd);
500         
501         if ((outputForm == "lt") || (outputForm == "square")) {
502             //output numSeqs
503             out << m->getNumGroups() << endl;
504             outStd << m->getNumGroups() << endl;
505         }
506         
507         //output to file
508         for (int r=0; r<m->getNumGroups(); r++) { 
509             //output name
510             string name = (m->getGroups())[r];
511             if (name.length() < 10) { //pad with spaces to make compatible
512                 while (name.length() < 10) {  name += " ";  }
513             }
514             
515             if (outputForm == "lt") {
516                 out << name << '\t';
517                 outStd << name << '\t';
518                 
519                 //output distances
520                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
521                 out << endl;  outStd << endl;
522             }else if (outputForm == "square") {
523                 out << name << '\t';
524                 outStd << name << '\t';
525                 
526                 //output distances
527                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
528                 out << endl; outStd << endl;
529             }else{
530                 //output distances
531                 for (int l = 0; l < r; l++) {   
532                     string otherName = (m->getGroups())[l];
533                     if (otherName.length() < 10) { //pad with spaces to make compatible
534                         while (otherName.length() < 10) {  otherName += " ";  }
535                     }
536                     
537                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
538                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
539                 }
540             }
541         }
542         out.close();
543         outStd.close();
544         
545         return 0;
546     }
547         catch(exception& e) {
548                 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
549                 exit(1);
550         }
551 }
552
553 /**************************************************************************************************/
554 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
555         try {
556         
557         //used in tree constructor 
558         m->runParse = false;
559         
560         //create treemap class from groupmap for tree class to use
561         TreeMap newTmap;
562         newTmap.makeSim(m->getGroups());
563         
564         //clear  old tree names if any
565         m->Treenames.clear();
566         
567         //fills globaldatas tree names
568         m->Treenames = m->getGroups();
569         
570         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
571         
572         if (m->control_pressed) { return 0; }
573         
574         Consensus con;
575         Tree* conTree = con.getTree(newTrees);
576         
577         //create a new filename
578         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");                               
579         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
580         ofstream outTree;
581         m->openOutputFile(conFile, outTree);
582         
583         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
584         outTree.close();
585         
586         return 0;
587
588     }
589         catch(exception& e) {
590                 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
591                 exit(1);
592         }
593 }
594 /**************************************************************************************************/
595
596 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
597         try {
598         
599         vector<Tree*> trees;
600         
601         //create a new filename
602         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");                             
603         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
604         
605         ofstream outAll;
606         m->openOutputFile(outputFile, outAll);
607         
608
609         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
610             
611             if (m->control_pressed) { break; }
612             
613             //make matrix with scores in it
614             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
615             for (int j = 0; j < m->getNumGroups(); j++) {
616                 sims[j].resize(m->getNumGroups(), 0.0);
617             }
618             
619             int count = 0;
620                         for (int r=0; r<m->getNumGroups(); r++) { 
621                                 for (int l = 0; l < r; l++) {
622                     double sim = -(dists[i][count]-1.0);
623                                         sims[r][l] = sim;
624                                         sims[l][r] = sim;
625                                         count++;
626                                 }
627                         }
628
629             //create tree
630             Tree* tempTree = new Tree(&mytmap, sims);
631             map<string, string> empty;
632             tempTree->assembleTree(empty);
633             
634             trees.push_back(tempTree);
635             
636             //print tree
637             tempTree->print(outAll);
638         }
639         
640         outAll.close();
641         
642         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
643         
644         return trees;
645     }
646         catch(exception& e) {
647                 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
648                 exit(1);
649         }
650 }
651 /**************************************************************************************************/
652
653 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
654         try {
655         
656         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
657         vector< vector<string> > namesOfGroupCombos;
658         for (int a=0; a<numGroups; a++) { 
659             for (int l = 0; l < a; l++) {       
660                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
661                 namesOfGroupCombos.push_back(groups);
662             }
663         }
664         
665         lines.clear();
666         
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
668         if(processors != 1){
669             int numPairs = namesOfGroupCombos.size();
670             int numPairsPerProcessor = numPairs / processors;
671             
672             for (int i = 0; i < processors; i++) {
673                 int startPos = i * numPairsPerProcessor;
674                 if(i == processors - 1){
675                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
676                 }
677                 lines.push_back(linePair(startPos, numPairsPerProcessor));
678             }
679         }
680 #endif
681         
682         
683         //get scores for random trees
684         for (int j = 0; j < iters; j++) {
685             
686 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
687             if(processors == 1){
688                 driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
689             }else{
690                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
691             }
692 #else
693             driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
694 #endif
695             
696             if (m->control_pressed) { delete tmap;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
697             
698             //report progress
699             //                                  m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
700         }
701         lines.clear();
702         
703         //find the signifigance of the score for summary file
704         for (int f = 0; f < numComp; f++) {
705             //sort random scores
706             sort(rScores[f].begin(), rScores[f].end());
707             
708             //the index of the score higher than yours is returned 
709             //so if you have 1000 random trees the index returned is 100 
710             //then there are 900 trees with a score greater then you. 
711             //giving you a signifigance of 0.900
712             int index = findIndex(usersScores[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
713             
714             //the signifigance is the number of trees with the users score or higher 
715             WScoreSig.push_back((iters-index)/(float)iters);
716         }
717         
718         //out << "Tree# " << i << endl;
719         calculateFreqsCumuls();
720         printWeightedFile();
721         
722         delete output;
723         
724         return 0;
725     }
726         catch(exception& e) {
727                 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
728                 exit(1);
729         }
730 }
731 /**************************************************************************************************/
732
733 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
734         try {
735 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
736                 int process = 1;
737                 vector<int> processIDS;
738                 
739                 EstOutput results;
740                 
741                 //loop through and create all the processes you want
742                 while (process != processors) {
743                         int pid = fork();
744                         
745                         if (pid > 0) {
746                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
747                                 process++;
748                         }else if (pid == 0){
749                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
750                         
751                                 //pass numSeqs to parent
752                                 ofstream out;
753                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
754                                 m->openOutputFile(tempFile, out);
755                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
756                                 out.close();
757                                 
758                                 exit(0);
759                         }else { 
760                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
761                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
762                                 exit(0);
763                         }
764                 }
765                 
766                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
767                 
768                 //force parent to wait until all the processes are done
769                 for (int i=0;i<(processors-1);i++) { 
770                         int temp = processIDS[i];
771                         wait(&temp);
772                 }
773                 
774                 //get data created by processes
775                 for (int i=0;i<(processors-1);i++) { 
776         
777                         ifstream in;
778                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
779                         m->openInputFile(s, in);
780                         
781                         double tempScore;
782                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
783                         in.close();
784                         m->mothurRemove(s);
785                 }
786                 
787                 return 0;
788 #endif          
789         }
790         catch(exception& e) {
791                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
792                 exit(1);
793         }
794 }
795
796 /**************************************************************************************************/
797 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
798  try {
799                 Tree* randT = new Tree(tmap);
800      
801         Weighted weighted(includeRoot);
802      
803                 for (int h = start; h < (start+num); h++) {
804         
805                         if (m->control_pressed) { return 0; }
806                 
807                         //initialize weighted score
808                         string groupA = namesOfGroupCombos[h][0]; 
809                         string groupB = namesOfGroupCombos[h][1];
810                         
811                         //copy T[i]'s info.
812                         randT->getCopy(t);
813                          
814                         //create a random tree with same topology as T[i], but different labels
815                         randT->assembleRandomUnifracTree(groupA, groupB);
816                         
817                         if (m->control_pressed) { delete randT;  return 0;  }
818
819                         //get wscore of random tree
820                         EstOutput randomData = weighted.getValues(randT, groupA, groupB);
821                 
822                         if (m->control_pressed) { delete randT;  return 0;  }
823                                                                                 
824                         //save scores
825                         scores[h].push_back(randomData[0]);
826                 }
827         
828                 delete randT;
829         
830                 return 0;
831
832         }
833         catch(exception& e) {
834                 m->errorOut(e, "UnifracWeightedCommand", "driver");
835                 exit(1);
836         }
837 }
838 /***********************************************************/
839 void UnifracWeightedCommand::printWeightedFile() {
840         try {
841                 vector<double> data;
842                 vector<string> tags;
843                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
844                 
845                 for(int a = 0; a < numComp; a++) {
846                         output->initFile(groupComb[a], tags);
847                         //print each line
848                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
849                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
850                                 output->output(data);
851                                 data.clear();
852                         } 
853                         output->resetFile();
854                 }
855         }
856         catch(exception& e) {
857                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
858                 exit(1);
859         }
860 }
861
862
863 /***********************************************************/
864 void UnifracWeightedCommand::printWSummaryFile() {
865         try {
866                 //column headers
867                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
868                 m->mothurOut("Tree#\tGroups\tWScore\t");
869                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
870                 outSum << endl; m->mothurOutEndLine();
871                 
872                 //format output
873                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
874                 
875                 //print each line
876                 int count = 0;
877                 for (int i = 0; i < T.size(); i++) { 
878                         for (int j = 0; j < numComp; j++) {
879                                 if (random) {
880                                         if (WScoreSig[count] > (1/(float)iters)) {
881                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
882                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
883                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
884                                         }else{
885                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
886                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
887                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
888                                         }
889                                 }else{
890                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
891                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
892                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
893                                 }
894                                 count++;
895                         }
896                 }
897                 outSum.close();
898         }
899         catch(exception& e) {
900                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
901                 exit(1);
902         }
903 }
904 /***********************************************************/
905 void UnifracWeightedCommand::createPhylipFile() {
906         try {
907                 int count = 0;
908                 //for each tree
909                 for (int i = 0; i < T.size(); i++) { 
910                 
911                         string phylipFileName;
912                         if ((outputForm == "lt") || (outputForm == "square")) {
913                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
914                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
915                         }else { //column
916                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
917                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
918                         }
919                         
920                         ofstream out;
921                         m->openOutputFile(phylipFileName, out);
922                         
923                         if ((outputForm == "lt") || (outputForm == "square")) {
924                                 //output numSeqs
925                                 out << m->getNumGroups() << endl;
926                         }
927
928                         //make matrix with scores in it
929                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
930                         for (int i = 0; i < m->getNumGroups(); i++) {
931                                 dists[i].resize(m->getNumGroups(), 0.0);
932                         }
933                         
934                         //flip it so you can print it
935                         for (int r=0; r<m->getNumGroups(); r++) { 
936                                 for (int l = 0; l < r; l++) {
937                                         dists[r][l] = utreeScores[count];
938                                         dists[l][r] = utreeScores[count];
939                                         count++;
940                                 }
941                         }
942
943                         //output to file
944                         for (int r=0; r<m->getNumGroups(); r++) { 
945                                 //output name
946                                 string name = (m->getGroups())[r];
947                                 if (name.length() < 10) { //pad with spaces to make compatible
948                                         while (name.length() < 10) {  name += " ";  }
949                                 }
950                                 
951                                 if (outputForm == "lt") {
952                                         out << name << '\t';
953                                         
954                                         //output distances
955                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
956                                         out << endl;
957                                 }else if (outputForm == "square") {
958                                         out << name << '\t';
959                                         
960                                         //output distances
961                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
962                                         out << endl;
963                                 }else{
964                                         //output distances
965                                         for (int l = 0; l < r; l++) {   
966                                                 string otherName = (m->getGroups())[l];
967                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
968                                                         while (otherName.length() < 10) {  otherName += " ";  }
969                                                 }
970                                                 
971                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
972                                         }
973                                 }
974                         }
975                         out.close();
976                 }
977         }
978         catch(exception& e) {
979                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
980                 exit(1);
981         }
982 }
983 /***********************************************************/
984 int UnifracWeightedCommand::findIndex(float score, int index) {
985         try{
986                 for (int i = 0; i < rScores[index].size(); i++) {
987                         if (rScores[index][i] >= score) {       return i;       }
988                 }
989                 return rScores[index].size();
990         }
991         catch(exception& e) {
992                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
993                 exit(1);
994         }
995 }
996
997 /***********************************************************/
998
999 void UnifracWeightedCommand::calculateFreqsCumuls() {
1000         try {
1001                 //clear out old tree values
1002                 rScoreFreq.clear();
1003                 rScoreFreq.resize(numComp);
1004                 rCumul.clear();
1005                 rCumul.resize(numComp);
1006                 validScores.clear();
1007         
1008                 //calculate frequency
1009                 for (int f = 0; f < numComp; f++) {
1010                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1011                                 validScores[rScores[f][i]] = rScores[f][i];
1012                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1013                                 if (it != rScoreFreq[f].end()) {
1014                                         rScoreFreq[f][rScores[f][i]]++;
1015                                 }else{
1016                                         rScoreFreq[f][rScores[f][i]] = 1;
1017                                 }
1018                         }
1019                 }
1020                 
1021                 //calculate rcumul
1022                 for(int a = 0; a < numComp; a++) {
1023                         float rcumul = 1.0000;
1024                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1025                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1026                                 //make rscoreFreq map and rCumul
1027                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1028                                 rCumul[a][it->first] = rcumul;
1029                                 //get percentage of random trees with that info
1030                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
1031                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1032                         }
1033                 }
1034
1035         }
1036         catch(exception& e) {
1037                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1038                 exit(1);
1039         }
1040 }
1041 /***********************************************************/
1042
1043
1044
1045
1046