]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
b3a54c929ace3221fe3ba736a3b5fad7f87818f9
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters=parser.getParameters();
90                         map<string,string>::iterator it;
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["weighted"] = tempOutNames;
102                         outputTypes["wsummary"] = tempOutNames;
103                         outputTypes["phylip"] = tempOutNames;
104                         outputTypes["column"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("tree");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("group");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                         }
135                         
136                         m->runParse = true;
137                         m->clearGroups();
138                         m->clearAllGroups();
139                         m->Treenames.clear();
140                         m->names.clear();
141                         
142                         //check for required parameters
143                         treefile = validParameter.validFile(parameters, "tree", true);
144                         if (treefile == "not open") { treefile = ""; abort = true; }
145                         else if (treefile == "not found") {                             //if there is a current design file, use it
146                                 treefile = m->getTreeFile(); 
147                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
149                         }else { m->setTreeFile(treefile); }     
150                         
151                         //check for required parameters
152                         groupfile = validParameter.validFile(parameters, "group", true);
153                         if (groupfile == "not open") { abort = true; }
154                         else if (groupfile == "not found") { groupfile = ""; }
155                         else { m->setGroupFile(groupfile); }
156                         
157                         namefile = validParameter.validFile(parameters, "name", true);
158                         if (namefile == "not open") { namefile = ""; abort = true; }
159                         else if (namefile == "not found") { namefile = ""; }
160                         else { m->setNameFile(namefile); }
161                         
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
163                         
164                                                                                                                                         
165                         //check for optional parameter and set defaults
166                         // ...at some point should added some additional type checking...
167                         groups = validParameter.validFile(parameters, "groups", false);                 
168                         if (groups == "not found") { groups = ""; }
169                         else { 
170                                 m->splitAtDash(groups, Groups);
171                                 m->setGroups(Groups);
172                         }
173                                 
174                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
175                         m->mothurConvert(itersString, iters); 
176                         
177                         string temp = validParameter.validFile(parameters, "distance", false);                  
178                         if (temp == "not found") { phylip = false; outputForm = ""; }
179                         else{
180                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
181                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
182                         }
183                         
184                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
185                         random = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
188                         includeRoot = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
191                         m->setProcessors(temp);
192                         m->mothurConvert(temp, processors);
193                         
194                         if (!random) {  iters = 0;  } //turn off random calcs
195                         
196                         if (namefile == "") {
197                                 vector<string> files; files.push_back(treefile);
198                                 parser.getNameFile(files);
199                         }
200                 }
201                 
202                 
203         }
204         catch(exception& e) {
205                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
206                 exit(1);
207         }
208 }
209 /***********************************************************/
210 int UnifracWeightedCommand::execute() {
211         try {
212         
213                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
214                 
215                 m->setTreeFile(treefile);
216                 
217                 if (groupfile != "") {
218                         //read in group map info.
219                         tmap = new TreeMap(groupfile);
220                         tmap->readMap();
221                 }else{ //fake out by putting everyone in one group
222                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
223                         tmap = new TreeMap();
224                         
225                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
226                 }
227                 
228                 if (namefile != "") { readNamesFile(); }
229                 
230                 read = new ReadNewickTree(treefile);
231                 int readOk = read->read(tmap); 
232                 
233                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
234                 
235                 read->AssembleTrees();
236                 T = read->getTrees();
237                 delete read;
238                 
239                 //make sure all files match
240                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
241                 int numNamesInTree;
242                 if (namefile != "")  {  
243                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
244                         else {   numNamesInTree = m->Treenames.size();  }
245                 }else {  numNamesInTree = m->Treenames.size();  }
246                 
247                 
248                 //output any names that are in group file but not in tree
249                 if (numNamesInTree < tmap->getNumSeqs()) {
250                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
251                                 //is that name in the tree?
252                                 int count = 0;
253                                 for (int j = 0; j < m->Treenames.size(); j++) {
254                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
255                                         count++;
256                                 }
257                                 
258                                 if (m->control_pressed) { 
259                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
260                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
261                                         m->clearGroups();
262                                         return 0;
263                                 }
264                                 
265                                 //then you did not find it so report it 
266                                 if (count == m->Treenames.size()) { 
267                                         //if it is in your namefile then don't remove
268                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
269                                         
270                                         if (it == nameMap.end()) {
271                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
272                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
273                                                 i--; //need this because removeSeq removes name from namesOfSeqs
274                                         }
275                                 }
276                         }
277                 }
278                 
279                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
280                 m->openOutputFile(sumFile, outSum);
281                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
282                         
283                 util = new SharedUtil();
284                 string s; //to make work with setgroups
285                 Groups = m->getGroups();
286                 vector<string> nameGroups = tmap->getNamesOfGroups();
287                 util->setGroups(Groups, nameGroups, s, numGroups, "weighted");  //sets the groups the user wants to analyze
288                 util->getCombos(groupComb, Groups, numComp);
289                 m->setGroups(Groups);
290                 delete util;
291                 
292                 weighted = new Weighted(tmap, includeRoot);
293                         
294                 int start = time(NULL);
295                 
296                 //get weighted for users tree
297                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
298                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
299                 
300                 if (numComp < processors) { processors = numComp; }
301                                 
302                 //get weighted scores for users trees
303                 for (int i = 0; i < T.size(); i++) {
304                         
305                         if (m->control_pressed) { delete tmap; delete weighted;
306                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
307
308                         counter = 0;
309                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
310                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
311                         
312                         if (random) {  
313                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
314                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
315                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
316                         } 
317
318                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
319                         
320                         if (m->control_pressed) { delete tmap; delete weighted;
321                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
322                         
323                         //save users score
324                         for (int s=0; s<numComp; s++) {
325                                 //add users score to vector of user scores
326                                 uScores[s].push_back(userData[s]);
327                                 
328                                 //save users tree score for summary file
329                                 utreeScores.push_back(userData[s]);
330                         }
331                         
332                         if (random) { 
333                         
334                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
335                                 vector< vector<string> > namesOfGroupCombos;
336                                 for (int a=0; a<numGroups; a++) { 
337                                         for (int l = 0; l < a; l++) {   
338                                                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
339                                                 namesOfGroupCombos.push_back(groups);
340                                         }
341                                 }
342                                 
343                                 lines.clear();
344                                 
345                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
346                                         if(processors != 1){
347                                                 int numPairs = namesOfGroupCombos.size();
348                                                 int numPairsPerProcessor = numPairs / processors;
349                                         
350                                                 for (int i = 0; i < processors; i++) {
351                                                         int startPos = i * numPairsPerProcessor;
352                                                         if(i == processors - 1){
353                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
354                                                         }
355                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
356                                                 }
357                                         }
358                                 #endif
359
360                                 
361                                 //get scores for random trees
362                                 for (int j = 0; j < iters; j++) {
363                                 
364                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
365                                                 if(processors == 1){
366                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
367                                                 }else{
368                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
369                                                 }
370                                         #else
371                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
372                                         #endif
373                                         
374                                         if (m->control_pressed) { delete tmap; delete weighted;
375                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
376                                         
377                                         //report progress
378 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
379                                 }
380                                 lines.clear();
381                         
382                                 //find the signifigance of the score for summary file
383                                 for (int f = 0; f < numComp; f++) {
384                                         //sort random scores
385                                         sort(rScores[f].begin(), rScores[f].end());
386                                         
387                                         //the index of the score higher than yours is returned 
388                                         //so if you have 1000 random trees the index returned is 100 
389                                         //then there are 900 trees with a score greater then you. 
390                                         //giving you a signifigance of 0.900
391                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
392                                         
393                                         //the signifigance is the number of trees with the users score or higher 
394                                         WScoreSig.push_back((iters-index)/(float)iters);
395                                 }
396                                 
397                                 //out << "Tree# " << i << endl;
398                                 calculateFreqsCumuls();
399                                 printWeightedFile();
400                                 
401                                 delete output;
402                         
403                         }
404                         
405                         //clear data
406                         rScores.clear();
407                         uScores.clear();
408                         validScores.clear();
409                 }
410                 
411                 
412                 if (m->control_pressed) { delete tmap; delete weighted;
413                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
414                 
415                 printWSummaryFile();
416                 
417                 if (phylip) {   createPhylipFile();             }
418
419                 //clear out users groups
420                 m->clearGroups();
421                 delete tmap; delete weighted;
422                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
423                 
424                 
425                 if (m->control_pressed) { 
426                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
427                         return 0; 
428                 }
429                 
430                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
431                 
432                 //set phylip file as new current phylipfile
433                 string current = "";
434                 itTypes = outputTypes.find("phylip");
435                 if (itTypes != outputTypes.end()) {
436                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
437                 }
438                 
439                 //set column file as new current columnfile
440                 itTypes = outputTypes.find("column");
441                 if (itTypes != outputTypes.end()) {
442                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
443                 }
444                 
445                 m->mothurOutEndLine();
446                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
447                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
448                 m->mothurOutEndLine();
449                 
450                 return 0;
451                 
452         }
453         catch(exception& e) {
454                 m->errorOut(e, "UnifracWeightedCommand", "execute");
455                 exit(1);
456         }
457 }
458 /**************************************************************************************************/
459
460 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
461         try {
462 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
463                 int process = 1;
464                 vector<int> processIDS;
465                 
466                 EstOutput results;
467                 
468                 //loop through and create all the processes you want
469                 while (process != processors) {
470                         int pid = fork();
471                         
472                         if (pid > 0) {
473                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
474                                 process++;
475                         }else if (pid == 0){
476                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
477                         
478                                 //pass numSeqs to parent
479                                 ofstream out;
480                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
481                                 m->openOutputFile(tempFile, out);
482                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
483                                 out.close();
484                                 
485                                 exit(0);
486                         }else { 
487                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
488                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
489                                 exit(0);
490                         }
491                 }
492                 
493                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
494                 
495                 //force parent to wait until all the processes are done
496                 for (int i=0;i<(processors-1);i++) { 
497                         int temp = processIDS[i];
498                         wait(&temp);
499                 }
500                 
501                 //get data created by processes
502                 for (int i=0;i<(processors-1);i++) { 
503         
504                         ifstream in;
505                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
506                         m->openInputFile(s, in);
507                         
508                         double tempScore;
509                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
510                         in.close();
511                         m->mothurRemove(s);
512                 }
513                 
514                 return 0;
515 #endif          
516         }
517         catch(exception& e) {
518                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
519                 exit(1);
520         }
521 }
522
523 /**************************************************************************************************/
524 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
525  try {
526                 Tree* randT = new Tree(tmap);
527
528                 for (int h = start; h < (start+num); h++) {
529         
530                         if (m->control_pressed) { return 0; }
531                 
532                         //initialize weighted score
533                         string groupA = namesOfGroupCombos[h][0]; 
534                         string groupB = namesOfGroupCombos[h][1];
535                         
536                         //copy T[i]'s info.
537                         randT->getCopy(t);
538                          
539                         //create a random tree with same topology as T[i], but different labels
540                         randT->assembleRandomUnifracTree(groupA, groupB);
541                         
542                         if (m->control_pressed) { delete randT;  return 0;  }
543
544                         //get wscore of random tree
545                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
546                 
547                         if (m->control_pressed) { delete randT;  return 0;  }
548                                                                                 
549                         //save scores
550                         scores[h].push_back(randomData[0]);
551                 }
552         
553                 delete randT;
554         
555                 return 0;
556
557         }
558         catch(exception& e) {
559                 m->errorOut(e, "UnifracWeightedCommand", "driver");
560                 exit(1);
561         }
562 }
563 /***********************************************************/
564 void UnifracWeightedCommand::printWeightedFile() {
565         try {
566                 vector<double> data;
567                 vector<string> tags;
568                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
569                 
570                 for(int a = 0; a < numComp; a++) {
571                         output->initFile(groupComb[a], tags);
572                         //print each line
573                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
574                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
575                                 output->output(data);
576                                 data.clear();
577                         } 
578                         output->resetFile();
579                 }
580         }
581         catch(exception& e) {
582                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
583                 exit(1);
584         }
585 }
586
587
588 /***********************************************************/
589 void UnifracWeightedCommand::printWSummaryFile() {
590         try {
591                 //column headers
592                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
593                 m->mothurOut("Tree#\tGroups\tWScore\t");
594                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
595                 outSum << endl; m->mothurOutEndLine();
596                 
597                 //format output
598                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
599                 
600                 //print each line
601                 int count = 0;
602                 for (int i = 0; i < T.size(); i++) { 
603                         for (int j = 0; j < numComp; j++) {
604                                 if (random) {
605                                         if (WScoreSig[count] > (1/(float)iters)) {
606                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
607                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
608                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
609                                         }else{
610                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
611                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
612                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
613                                         }
614                                 }else{
615                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
616                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
617                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
618                                 }
619                                 count++;
620                         }
621                 }
622                 outSum.close();
623         }
624         catch(exception& e) {
625                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
626                 exit(1);
627         }
628 }
629 /***********************************************************/
630 void UnifracWeightedCommand::createPhylipFile() {
631         try {
632                 int count = 0;
633                 //for each tree
634                 for (int i = 0; i < T.size(); i++) { 
635                 
636                         string phylipFileName;
637                         if ((outputForm == "lt") || (outputForm == "square")) {
638                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
639                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
640                         }else { //column
641                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
642                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
643                         }
644                         
645                         ofstream out;
646                         m->openOutputFile(phylipFileName, out);
647                         
648                         if ((outputForm == "lt") || (outputForm == "square")) {
649                                 //output numSeqs
650                                 out << m->getNumGroups() << endl;
651                         }
652
653                         //make matrix with scores in it
654                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
655                         for (int i = 0; i < m->getNumGroups(); i++) {
656                                 dists[i].resize(m->getNumGroups(), 0.0);
657                         }
658                         
659                         //flip it so you can print it
660                         for (int r=0; r<m->getNumGroups(); r++) { 
661                                 for (int l = 0; l < r; l++) {
662                                         dists[r][l] = utreeScores[count];
663                                         dists[l][r] = utreeScores[count];
664                                         count++;
665                                 }
666                         }
667
668                         //output to file
669                         for (int r=0; r<m->getNumGroups(); r++) { 
670                                 //output name
671                                 string name = (m->getGroups())[r];
672                                 if (name.length() < 10) { //pad with spaces to make compatible
673                                         while (name.length() < 10) {  name += " ";  }
674                                 }
675                                 
676                                 if (outputForm == "lt") {
677                                         out << name << '\t';
678                                         
679                                         //output distances
680                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
681                                         out << endl;
682                                 }else if (outputForm == "square") {
683                                         out << name << '\t';
684                                         
685                                         //output distances
686                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
687                                         out << endl;
688                                 }else{
689                                         //output distances
690                                         for (int l = 0; l < r; l++) {   
691                                                 string otherName = (m->getGroups())[l];
692                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
693                                                         while (otherName.length() < 10) {  otherName += " ";  }
694                                                 }
695                                                 
696                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
697                                         }
698                                 }
699                         }
700                         out.close();
701                 }
702         }
703         catch(exception& e) {
704                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
705                 exit(1);
706         }
707 }
708 /***********************************************************/
709 int UnifracWeightedCommand::findIndex(float score, int index) {
710         try{
711                 for (int i = 0; i < rScores[index].size(); i++) {
712                         if (rScores[index][i] >= score) {       return i;       }
713                 }
714                 return rScores[index].size();
715         }
716         catch(exception& e) {
717                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
718                 exit(1);
719         }
720 }
721
722 /***********************************************************/
723
724 void UnifracWeightedCommand::calculateFreqsCumuls() {
725         try {
726                 //clear out old tree values
727                 rScoreFreq.clear();
728                 rScoreFreq.resize(numComp);
729                 rCumul.clear();
730                 rCumul.resize(numComp);
731                 validScores.clear();
732         
733                 //calculate frequency
734                 for (int f = 0; f < numComp; f++) {
735                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
736                                 validScores[rScores[f][i]] = rScores[f][i];
737                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
738                                 if (it != rScoreFreq[f].end()) {
739                                         rScoreFreq[f][rScores[f][i]]++;
740                                 }else{
741                                         rScoreFreq[f][rScores[f][i]] = 1;
742                                 }
743                         }
744                 }
745                 
746                 //calculate rcumul
747                 for(int a = 0; a < numComp; a++) {
748                         float rcumul = 1.0000;
749                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
750                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
751                                 //make rscoreFreq map and rCumul
752                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
753                                 rCumul[a][it->first] = rcumul;
754                                 //get percentage of random trees with that info
755                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
756                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
757                         }
758                 }
759
760         }
761         catch(exception& e) {
762                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
763                 exit(1);
764         }
765 }
766 /*****************************************************************/
767 int UnifracWeightedCommand::readNamesFile() {
768         try {
769                 m->names.clear();
770                 numUniquesInName = 0;
771                 
772                 ifstream in;
773                 m->openInputFile(namefile, in);
774                 
775                 string first, second;
776                 map<string, string>::iterator itNames;
777                 
778                 while(!in.eof()) {
779                         in >> first >> second; m->gobble(in);
780                         
781                         numUniquesInName++;
782                         
783                         itNames = m->names.find(first);
784                         if (itNames == m->names.end()) {  
785                                 m->names[first] = second; 
786                                 
787                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
788                                 vector<string> dupNames;
789                                 m->splitAtComma(second, dupNames);
790                                 
791                                 for (int i = 0; i < dupNames.size(); i++) {     
792                                         nameMap[dupNames[i]] = dupNames[i]; 
793                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
794                                 }
795                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
796                 }
797                 in.close();
798                 
799                 return 0;
800         }
801         catch(exception& e) {
802                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
803                 exit(1);
804         }
805 }
806 /***********************************************************/
807
808
809
810
811