]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
1.22.0
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters=parser.getParameters();
90                         map<string,string>::iterator it;
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["weighted"] = tempOutNames;
102                         outputTypes["wsummary"] = tempOutNames;
103                         outputTypes["phylip"] = tempOutNames;
104                         outputTypes["column"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("tree");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("group");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                         }
135                         
136                         m->runParse = true;
137                         m->clearGroups();
138                         m->clearAllGroups();
139                         m->Treenames.clear();
140                         m->names.clear();
141                         
142                         //check for required parameters
143                         treefile = validParameter.validFile(parameters, "tree", true);
144                         if (treefile == "not open") { abort = true; }
145                         else if (treefile == "not found") {                             //if there is a current design file, use it
146                                 treefile = m->getTreeFile(); 
147                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
149                         }else { m->setTreeFile(treefile); }     
150                         
151                         //check for required parameters
152                         groupfile = validParameter.validFile(parameters, "group", true);
153                         if (groupfile == "not open") { abort = true; }
154                         else if (groupfile == "not found") { groupfile = ""; }
155                         else { m->setGroupFile(groupfile); }
156                         
157                         namefile = validParameter.validFile(parameters, "name", true);
158                         if (namefile == "not open") { abort = true; }
159                         else if (namefile == "not found") { namefile = ""; }
160                         else { m->setNameFile(namefile); }
161                         
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
163                         
164                                                                                                                                         
165                         //check for optional parameter and set defaults
166                         // ...at some point should added some additional type checking...
167                         groups = validParameter.validFile(parameters, "groups", false);                 
168                         if (groups == "not found") { groups = ""; }
169                         else { 
170                                 m->splitAtDash(groups, Groups);
171                                 m->setGroups(Groups);
172                         }
173                                 
174                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
175                         convert(itersString, iters); 
176                         
177                         string temp = validParameter.validFile(parameters, "distance", false);                  
178                         if (temp == "not found") { phylip = false; outputForm = ""; }
179                         else{
180                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
181                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
182                         }
183                         
184                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
185                         random = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
188                         includeRoot = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
191                         m->setProcessors(temp);
192                         convert(temp, processors);
193                         
194                         if (!random) {  iters = 0;  } //turn off random calcs
195                 }
196                 
197                 
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
201                 exit(1);
202         }
203 }
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
206         try {
207         
208                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
209                 
210                 m->setTreeFile(treefile);
211                 
212                 if (groupfile != "") {
213                         //read in group map info.
214                         tmap = new TreeMap(groupfile);
215                         tmap->readMap();
216                 }else{ //fake out by putting everyone in one group
217                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
218                         tmap = new TreeMap();
219                         
220                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
221                 }
222                 
223                 if (namefile != "") { readNamesFile(); }
224                 
225                 read = new ReadNewickTree(treefile);
226                 int readOk = read->read(tmap); 
227                 
228                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
229                 
230                 read->AssembleTrees();
231                 T = read->getTrees();
232                 delete read;
233                 
234                 //make sure all files match
235                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
236                 int numNamesInTree;
237                 if (namefile != "")  {  
238                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
239                         else {   numNamesInTree = m->Treenames.size();  }
240                 }else {  numNamesInTree = m->Treenames.size();  }
241                 
242                 
243                 //output any names that are in group file but not in tree
244                 if (numNamesInTree < tmap->getNumSeqs()) {
245                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246                                 //is that name in the tree?
247                                 int count = 0;
248                                 for (int j = 0; j < m->Treenames.size(); j++) {
249                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
250                                         count++;
251                                 }
252                                 
253                                 if (m->control_pressed) { 
254                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
256                                         m->clearGroups();
257                                         return 0;
258                                 }
259                                 
260                                 //then you did not find it so report it 
261                                 if (count == m->Treenames.size()) { 
262                                         //if it is in your namefile then don't remove
263                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
264                                         
265                                         if (it == nameMap.end()) {
266                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
268                                                 i--; //need this because removeSeq removes name from namesOfSeqs
269                                         }
270                                 }
271                         }
272                 }
273                 
274                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275                 m->openOutputFile(sumFile, outSum);
276                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
277                         
278                 util = new SharedUtil();
279                 string s; //to make work with setgroups
280                 Groups = m->getGroups();
281                 vector<string> nameGroups = tmap->getNamesOfGroups();
282                 util->setGroups(Groups, nameGroups, s, numGroups, "weighted");  //sets the groups the user wants to analyze
283                 util->getCombos(groupComb, Groups, numComp);
284                 m->setGroups(Groups);
285                 delete util;
286                 
287                 weighted = new Weighted(tmap, includeRoot);
288                         
289                 int start = time(NULL);
290                 
291                 //get weighted for users tree
292                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
293                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
294                 
295                 if (numComp < processors) { processors = numComp; }
296                                 
297                 //get weighted scores for users trees
298                 for (int i = 0; i < T.size(); i++) {
299                         
300                         if (m->control_pressed) { delete tmap; delete weighted;
301                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
302
303                         counter = 0;
304                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
305                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
306                         
307                         if (random) {  
308                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
309                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
310                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
311                         } 
312
313                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
314                         
315                         if (m->control_pressed) { delete tmap; delete weighted;
316                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
317                         
318                         //save users score
319                         for (int s=0; s<numComp; s++) {
320                                 //add users score to vector of user scores
321                                 uScores[s].push_back(userData[s]);
322                                 
323                                 //save users tree score for summary file
324                                 utreeScores.push_back(userData[s]);
325                         }
326                         
327                         if (random) { 
328                         
329                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
330                                 vector< vector<string> > namesOfGroupCombos;
331                                 for (int a=0; a<numGroups; a++) { 
332                                         for (int l = 0; l < a; l++) {   
333                                                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
334                                                 namesOfGroupCombos.push_back(groups);
335                                         }
336                                 }
337                                 
338                                 lines.clear();
339                                 
340                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
341                                         if(processors != 1){
342                                                 int numPairs = namesOfGroupCombos.size();
343                                                 int numPairsPerProcessor = numPairs / processors;
344                                         
345                                                 for (int i = 0; i < processors; i++) {
346                                                         int startPos = i * numPairsPerProcessor;
347                                                         if(i == processors - 1){
348                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
349                                                         }
350                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
351                                                 }
352                                         }
353                                 #endif
354
355                                 
356                                 //get scores for random trees
357                                 for (int j = 0; j < iters; j++) {
358                                 
359                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
360                                                 if(processors == 1){
361                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
362                                                 }else{
363                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
364                                                 }
365                                         #else
366                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
367                                         #endif
368                                         
369                                         if (m->control_pressed) { delete tmap; delete weighted;
370                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
371                                         
372                                         //report progress
373 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
374                                 }
375                                 lines.clear();
376                         
377                                 //find the signifigance of the score for summary file
378                                 for (int f = 0; f < numComp; f++) {
379                                         //sort random scores
380                                         sort(rScores[f].begin(), rScores[f].end());
381                                         
382                                         //the index of the score higher than yours is returned 
383                                         //so if you have 1000 random trees the index returned is 100 
384                                         //then there are 900 trees with a score greater then you. 
385                                         //giving you a signifigance of 0.900
386                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
387                                         
388                                         //the signifigance is the number of trees with the users score or higher 
389                                         WScoreSig.push_back((iters-index)/(float)iters);
390                                 }
391                                 
392                                 //out << "Tree# " << i << endl;
393                                 calculateFreqsCumuls();
394                                 printWeightedFile();
395                                 
396                                 delete output;
397                         
398                         }
399                         
400                         //clear data
401                         rScores.clear();
402                         uScores.clear();
403                         validScores.clear();
404                 }
405                 
406                 
407                 if (m->control_pressed) { delete tmap; delete weighted;
408                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
409                 
410                 printWSummaryFile();
411                 
412                 if (phylip) {   createPhylipFile();             }
413
414                 //clear out users groups
415                 m->clearGroups();
416                 delete tmap; delete weighted;
417                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
418                 
419                 
420                 if (m->control_pressed) { 
421                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
422                         return 0; 
423                 }
424                 
425                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
426                 
427                 //set phylip file as new current phylipfile
428                 string current = "";
429                 itTypes = outputTypes.find("phylip");
430                 if (itTypes != outputTypes.end()) {
431                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
432                 }
433                 
434                 //set column file as new current columnfile
435                 itTypes = outputTypes.find("column");
436                 if (itTypes != outputTypes.end()) {
437                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
438                 }
439                 
440                 m->mothurOutEndLine();
441                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
442                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
443                 m->mothurOutEndLine();
444                 
445                 return 0;
446                 
447         }
448         catch(exception& e) {
449                 m->errorOut(e, "UnifracWeightedCommand", "execute");
450                 exit(1);
451         }
452 }
453 /**************************************************************************************************/
454
455 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
456         try {
457 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
458                 int process = 1;
459                 vector<int> processIDS;
460                 
461                 EstOutput results;
462                 
463                 //loop through and create all the processes you want
464                 while (process != processors) {
465                         int pid = fork();
466                         
467                         if (pid > 0) {
468                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
469                                 process++;
470                         }else if (pid == 0){
471                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
472                         
473                                 //pass numSeqs to parent
474                                 ofstream out;
475                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
476                                 m->openOutputFile(tempFile, out);
477                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
478                                 out.close();
479                                 
480                                 exit(0);
481                         }else { 
482                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
483                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
484                                 exit(0);
485                         }
486                 }
487                 
488                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
489                 
490                 //force parent to wait until all the processes are done
491                 for (int i=0;i<(processors-1);i++) { 
492                         int temp = processIDS[i];
493                         wait(&temp);
494                 }
495                 
496                 //get data created by processes
497                 for (int i=0;i<(processors-1);i++) { 
498         
499                         ifstream in;
500                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
501                         m->openInputFile(s, in);
502                         
503                         double tempScore;
504                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
505                         in.close();
506                         m->mothurRemove(s);
507                 }
508                 
509                 return 0;
510 #endif          
511         }
512         catch(exception& e) {
513                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
514                 exit(1);
515         }
516 }
517
518 /**************************************************************************************************/
519 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
520  try {
521                 Tree* randT = new Tree(tmap);
522
523                 for (int h = start; h < (start+num); h++) {
524         
525                         if (m->control_pressed) { return 0; }
526                 
527                         //initialize weighted score
528                         string groupA = namesOfGroupCombos[h][0]; 
529                         string groupB = namesOfGroupCombos[h][1];
530                         
531                         //copy T[i]'s info.
532                         randT->getCopy(t);
533                          
534                         //create a random tree with same topology as T[i], but different labels
535                         randT->assembleRandomUnifracTree(groupA, groupB);
536                         
537                         if (m->control_pressed) { delete randT;  return 0;  }
538
539                         //get wscore of random tree
540                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
541                 
542                         if (m->control_pressed) { delete randT;  return 0;  }
543                                                                                 
544                         //save scores
545                         scores[h].push_back(randomData[0]);
546                 }
547         
548                 delete randT;
549         
550                 return 0;
551
552         }
553         catch(exception& e) {
554                 m->errorOut(e, "UnifracWeightedCommand", "driver");
555                 exit(1);
556         }
557 }
558 /***********************************************************/
559 void UnifracWeightedCommand::printWeightedFile() {
560         try {
561                 vector<double> data;
562                 vector<string> tags;
563                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
564                 
565                 for(int a = 0; a < numComp; a++) {
566                         output->initFile(groupComb[a], tags);
567                         //print each line
568                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
569                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
570                                 output->output(data);
571                                 data.clear();
572                         } 
573                         output->resetFile();
574                 }
575         }
576         catch(exception& e) {
577                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
578                 exit(1);
579         }
580 }
581
582
583 /***********************************************************/
584 void UnifracWeightedCommand::printWSummaryFile() {
585         try {
586                 //column headers
587                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
588                 m->mothurOut("Tree#\tGroups\tWScore\t");
589                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
590                 outSum << endl; m->mothurOutEndLine();
591                 
592                 //format output
593                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
594                 
595                 //print each line
596                 int count = 0;
597                 for (int i = 0; i < T.size(); i++) { 
598                         for (int j = 0; j < numComp; j++) {
599                                 if (random) {
600                                         if (WScoreSig[count] > (1/(float)iters)) {
601                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
602                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
603                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
604                                         }else{
605                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
606                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
607                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
608                                         }
609                                 }else{
610                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
611                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
612                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
613                                 }
614                                 count++;
615                         }
616                 }
617                 outSum.close();
618         }
619         catch(exception& e) {
620                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
621                 exit(1);
622         }
623 }
624 /***********************************************************/
625 void UnifracWeightedCommand::createPhylipFile() {
626         try {
627                 int count = 0;
628                 //for each tree
629                 for (int i = 0; i < T.size(); i++) { 
630                 
631                         string phylipFileName;
632                         if ((outputForm == "lt") || (outputForm == "square")) {
633                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
634                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
635                         }else { //column
636                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
637                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
638                         }
639                         
640                         ofstream out;
641                         m->openOutputFile(phylipFileName, out);
642                         
643                         if ((outputForm == "lt") || (outputForm == "square")) {
644                                 //output numSeqs
645                                 out << m->getNumGroups() << endl;
646                         }
647
648                         //make matrix with scores in it
649                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
650                         for (int i = 0; i < m->getNumGroups(); i++) {
651                                 dists[i].resize(m->getNumGroups(), 0.0);
652                         }
653                         
654                         //flip it so you can print it
655                         for (int r=0; r<m->getNumGroups(); r++) { 
656                                 for (int l = 0; l < r; l++) {
657                                         dists[r][l] = utreeScores[count];
658                                         dists[l][r] = utreeScores[count];
659                                         count++;
660                                 }
661                         }
662
663                         //output to file
664                         for (int r=0; r<m->getNumGroups(); r++) { 
665                                 //output name
666                                 string name = (m->getGroups())[r];
667                                 if (name.length() < 10) { //pad with spaces to make compatible
668                                         while (name.length() < 10) {  name += " ";  }
669                                 }
670                                 
671                                 if (outputForm == "lt") {
672                                         out << name << '\t';
673                                         
674                                         //output distances
675                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
676                                         out << endl;
677                                 }else if (outputForm == "square") {
678                                         out << name << '\t';
679                                         
680                                         //output distances
681                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
682                                         out << endl;
683                                 }else{
684                                         //output distances
685                                         for (int l = 0; l < r; l++) {   
686                                                 string otherName = (m->getGroups())[l];
687                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
688                                                         while (otherName.length() < 10) {  otherName += " ";  }
689                                                 }
690                                                 
691                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
692                                         }
693                                 }
694                         }
695                         out.close();
696                 }
697         }
698         catch(exception& e) {
699                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
700                 exit(1);
701         }
702 }
703 /***********************************************************/
704 int UnifracWeightedCommand::findIndex(float score, int index) {
705         try{
706                 for (int i = 0; i < rScores[index].size(); i++) {
707                         if (rScores[index][i] >= score) {       return i;       }
708                 }
709                 return rScores[index].size();
710         }
711         catch(exception& e) {
712                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
713                 exit(1);
714         }
715 }
716
717 /***********************************************************/
718
719 void UnifracWeightedCommand::calculateFreqsCumuls() {
720         try {
721                 //clear out old tree values
722                 rScoreFreq.clear();
723                 rScoreFreq.resize(numComp);
724                 rCumul.clear();
725                 rCumul.resize(numComp);
726                 validScores.clear();
727         
728                 //calculate frequency
729                 for (int f = 0; f < numComp; f++) {
730                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
731                                 validScores[rScores[f][i]] = rScores[f][i];
732                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
733                                 if (it != rScoreFreq[f].end()) {
734                                         rScoreFreq[f][rScores[f][i]]++;
735                                 }else{
736                                         rScoreFreq[f][rScores[f][i]] = 1;
737                                 }
738                         }
739                 }
740                 
741                 //calculate rcumul
742                 for(int a = 0; a < numComp; a++) {
743                         float rcumul = 1.0000;
744                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
745                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
746                                 //make rscoreFreq map and rCumul
747                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
748                                 rCumul[a][it->first] = rcumul;
749                                 //get percentage of random trees with that info
750                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
751                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
752                         }
753                 }
754
755         }
756         catch(exception& e) {
757                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
758                 exit(1);
759         }
760 }
761 /*****************************************************************/
762 int UnifracWeightedCommand::readNamesFile() {
763         try {
764                 m->names.clear();
765                 numUniquesInName = 0;
766                 
767                 ifstream in;
768                 m->openInputFile(namefile, in);
769                 
770                 string first, second;
771                 map<string, string>::iterator itNames;
772                 
773                 while(!in.eof()) {
774                         in >> first >> second; m->gobble(in);
775                         
776                         numUniquesInName++;
777                         
778                         itNames = m->names.find(first);
779                         if (itNames == m->names.end()) {  
780                                 m->names[first] = second; 
781                                 
782                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
783                                 vector<string> dupNames;
784                                 m->splitAtComma(second, dupNames);
785                                 
786                                 for (int i = 0; i < dupNames.size(); i++) {     
787                                         nameMap[dupNames[i]] = dupNames[i]; 
788                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
789                                 }
790                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
791                 }
792                 in.close();
793                 
794                 return 0;
795         }
796         catch(exception& e) {
797                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
798                 exit(1);
799         }
800 }
801 /***********************************************************/
802
803
804
805
806