]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
0570945b71d9c8962c46bec2a58525a1e37a5e6f
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters=parser.getParameters();
90                         map<string,string>::iterator it;
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["weighted"] = tempOutNames;
102                         outputTypes["wsummary"] = tempOutNames;
103                         outputTypes["phylip"] = tempOutNames;
104                         outputTypes["column"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("tree");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("group");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                         }
135                         
136                         m->runParse = true;
137                         m->Groups.clear();
138                         m->namesOfGroups.clear();
139                         m->Treenames.clear();
140                         m->names.clear();
141                         
142                         //check for required parameters
143                         treefile = validParameter.validFile(parameters, "tree", true);
144                         if (treefile == "not open") { abort = true; }
145                         else if (treefile == "not found") {                             //if there is a current design file, use it
146                                 treefile = m->getTreeFile(); 
147                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
149                         }       
150                         
151                         //check for required parameters
152                         groupfile = validParameter.validFile(parameters, "group", true);
153                         if (groupfile == "not open") { abort = true; }
154                         else if (groupfile == "not found") { groupfile = ""; }
155                         
156                         namefile = validParameter.validFile(parameters, "name", true);
157                         if (namefile == "not open") { abort = true; }
158                         else if (namefile == "not found") { namefile = ""; }
159                         
160                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
161                         
162                                                                                                                                         
163                         //check for optional parameter and set defaults
164                         // ...at some point should added some additional type checking...
165                         groups = validParameter.validFile(parameters, "groups", false);                 
166                         if (groups == "not found") { groups = ""; }
167                         else { 
168                                 m->splitAtDash(groups, Groups);
169                                 m->Groups = Groups;
170                         }
171                                 
172                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
173                         convert(itersString, iters); 
174                         
175                         string temp = validParameter.validFile(parameters, "distance", false);                  
176                         if (temp == "not found") { phylip = false; outputForm = ""; }
177                         else{
178                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
179                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
180                         }
181                         
182                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
183                         random = m->isTrue(temp);
184                         
185                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
186                         includeRoot = m->isTrue(temp);
187                         
188                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
189                         m->setProcessors(temp);
190                         convert(temp, processors);
191                         
192                         if (!random) {  iters = 0;  } //turn off random calcs
193                 }
194                 
195                 
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
199                 exit(1);
200         }
201 }
202 /***********************************************************/
203 int UnifracWeightedCommand::execute() {
204         try {
205         
206                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
207                 
208                 m->setTreeFile(treefile);
209                 
210                 if (groupfile != "") {
211                         //read in group map info.
212                         tmap = new TreeMap(groupfile);
213                         tmap->readMap();
214                 }else{ //fake out by putting everyone in one group
215                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
216                         tmap = new TreeMap();
217                         
218                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
219                 }
220                 
221                 if (namefile != "") { readNamesFile(); }
222                 
223                 read = new ReadNewickTree(treefile);
224                 int readOk = read->read(tmap); 
225                 
226                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
227                 
228                 read->AssembleTrees();
229                 T = read->getTrees();
230                 delete read;
231                 
232                 //make sure all files match
233                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
234                 int numNamesInTree;
235                 if (namefile != "")  {  
236                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
237                         else {   numNamesInTree = m->Treenames.size();  }
238                 }else {  numNamesInTree = m->Treenames.size();  }
239                 
240                 
241                 //output any names that are in group file but not in tree
242                 if (numNamesInTree < tmap->getNumSeqs()) {
243                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
244                                 //is that name in the tree?
245                                 int count = 0;
246                                 for (int j = 0; j < m->Treenames.size(); j++) {
247                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
248                                         count++;
249                                 }
250                                 
251                                 if (m->control_pressed) { 
252                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
253                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
254                                         m->Groups.clear();
255                                         return 0;
256                                 }
257                                 
258                                 //then you did not find it so report it 
259                                 if (count == m->Treenames.size()) { 
260                                         //if it is in your namefile then don't remove
261                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
262                                         
263                                         if (it == nameMap.end()) {
264                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
265                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
266                                                 i--; //need this because removeSeq removes name from namesOfSeqs
267                                         }
268                                 }
269                         }
270                 }
271                 
272                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
273                 m->openOutputFile(sumFile, outSum);
274                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
275                         
276                 util = new SharedUtil();
277                 string s; //to make work with setgroups
278                 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted");      //sets the groups the user wants to analyze
279                 util->getCombos(groupComb, m->Groups, numComp);
280                 delete util;
281                 
282                 weighted = new Weighted(tmap, includeRoot);
283                         
284                 int start = time(NULL);
285                 
286                 //get weighted for users tree
287                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
288                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
289                 
290                 if (numComp < processors) { processors = numComp; }
291                                 
292                 //get weighted scores for users trees
293                 for (int i = 0; i < T.size(); i++) {
294                         
295                         if (m->control_pressed) { delete tmap; delete weighted;
296                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str());  } return 0; }
297
298                         counter = 0;
299                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
300                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
301                         
302                         if (random) {  
303                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
304                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
305                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
306                         } 
307
308                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
309                         
310                         if (m->control_pressed) { delete tmap; delete weighted;
311                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
312                         
313                         //save users score
314                         for (int s=0; s<numComp; s++) {
315                                 //add users score to vector of user scores
316                                 uScores[s].push_back(userData[s]);
317                                 
318                                 //save users tree score for summary file
319                                 utreeScores.push_back(userData[s]);
320                         }
321                         
322                         if (random) { 
323                         
324                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
325                                 vector< vector<string> > namesOfGroupCombos;
326                                 for (int a=0; a<numGroups; a++) { 
327                                         for (int l = 0; l < a; l++) {   
328                                                 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
329                                                 namesOfGroupCombos.push_back(groups);
330                                         }
331                                 }
332                                 
333                                 lines.clear();
334                                 
335                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
336                                         if(processors != 1){
337                                                 int numPairs = namesOfGroupCombos.size();
338                                                 int numPairsPerProcessor = numPairs / processors;
339                                         
340                                                 for (int i = 0; i < processors; i++) {
341                                                         int startPos = i * numPairsPerProcessor;
342                                                         if(i == processors - 1){
343                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
344                                                         }
345                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
346                                                 }
347                                         }
348                                 #endif
349
350                                 
351                                 //get scores for random trees
352                                 for (int j = 0; j < iters; j++) {
353                                 
354                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
355                                                 if(processors == 1){
356                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
357                                                 }else{
358                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
359                                                 }
360                                         #else
361                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
362                                         #endif
363                                         
364                                         if (m->control_pressed) { delete tmap; delete weighted;
365                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
366                                         
367                                         //report progress
368 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
369                                 }
370                                 lines.clear();
371                         
372                                 //find the signifigance of the score for summary file
373                                 for (int f = 0; f < numComp; f++) {
374                                         //sort random scores
375                                         sort(rScores[f].begin(), rScores[f].end());
376                                         
377                                         //the index of the score higher than yours is returned 
378                                         //so if you have 1000 random trees the index returned is 100 
379                                         //then there are 900 trees with a score greater then you. 
380                                         //giving you a signifigance of 0.900
381                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
382                                         
383                                         //the signifigance is the number of trees with the users score or higher 
384                                         WScoreSig.push_back((iters-index)/(float)iters);
385                                 }
386                                 
387                                 //out << "Tree# " << i << endl;
388                                 calculateFreqsCumuls();
389                                 printWeightedFile();
390                                 
391                                 delete output;
392                         
393                         }
394                         
395                         //clear data
396                         rScores.clear();
397                         uScores.clear();
398                         validScores.clear();
399                 }
400                 
401                 
402                 if (m->control_pressed) { delete tmap; delete weighted;
403                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str());  } return 0;  }
404                 
405                 printWSummaryFile();
406                 
407                 if (phylip) {   createPhylipFile();             }
408
409                 //clear out users groups
410                 m->Groups.clear();
411                 delete tmap; delete weighted;
412                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
413                 
414                 
415                 if (m->control_pressed) { 
416                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
417                         return 0; 
418                 }
419                 
420                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
421                 
422                 //set phylip file as new current phylipfile
423                 string current = "";
424                 itTypes = outputTypes.find("phylip");
425                 if (itTypes != outputTypes.end()) {
426                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
427                 }
428                 
429                 //set column file as new current columnfile
430                 itTypes = outputTypes.find("column");
431                 if (itTypes != outputTypes.end()) {
432                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
433                 }
434                 
435                 m->mothurOutEndLine();
436                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
437                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
438                 m->mothurOutEndLine();
439                 
440                 return 0;
441                 
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "UnifracWeightedCommand", "execute");
445                 exit(1);
446         }
447 }
448 /**************************************************************************************************/
449
450 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
451         try {
452 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
453                 int process = 1;
454                 vector<int> processIDS;
455                 
456                 EstOutput results;
457                 
458                 //loop through and create all the processes you want
459                 while (process != processors) {
460                         int pid = fork();
461                         
462                         if (pid > 0) {
463                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
464                                 process++;
465                         }else if (pid == 0){
466                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
467                         
468                                 //pass numSeqs to parent
469                                 ofstream out;
470                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
471                                 m->openOutputFile(tempFile, out);
472                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
473                                 out.close();
474                                 
475                                 exit(0);
476                         }else { 
477                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
478                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
479                                 exit(0);
480                         }
481                 }
482                 
483                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
484                 
485                 //force parent to wait until all the processes are done
486                 for (int i=0;i<(processors-1);i++) { 
487                         int temp = processIDS[i];
488                         wait(&temp);
489                 }
490                 
491                 //get data created by processes
492                 for (int i=0;i<(processors-1);i++) { 
493         
494                         ifstream in;
495                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
496                         m->openInputFile(s, in);
497                         
498                         double tempScore;
499                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
500                         in.close();
501                         remove(s.c_str());
502                 }
503                 
504                 return 0;
505 #endif          
506         }
507         catch(exception& e) {
508                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
509                 exit(1);
510         }
511 }
512
513 /**************************************************************************************************/
514 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
515  try {
516                 Tree* randT = new Tree(tmap);
517
518                 for (int h = start; h < (start+num); h++) {
519         
520                         if (m->control_pressed) { return 0; }
521                 
522                         //initialize weighted score
523                         string groupA = namesOfGroupCombos[h][0]; 
524                         string groupB = namesOfGroupCombos[h][1];
525                         
526                         //copy T[i]'s info.
527                         randT->getCopy(t);
528                          
529                         //create a random tree with same topology as T[i], but different labels
530                         randT->assembleRandomUnifracTree(groupA, groupB);
531                         
532                         if (m->control_pressed) { delete randT;  return 0;  }
533
534                         //get wscore of random tree
535                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
536                 
537                         if (m->control_pressed) { delete randT;  return 0;  }
538                                                                                 
539                         //save scores
540                         scores[h].push_back(randomData[0]);
541                 }
542         
543                 delete randT;
544         
545                 return 0;
546
547         }
548         catch(exception& e) {
549                 m->errorOut(e, "UnifracWeightedCommand", "driver");
550                 exit(1);
551         }
552 }
553 /***********************************************************/
554 void UnifracWeightedCommand::printWeightedFile() {
555         try {
556                 vector<double> data;
557                 vector<string> tags;
558                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
559                 
560                 for(int a = 0; a < numComp; a++) {
561                         output->initFile(groupComb[a], tags);
562                         //print each line
563                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
564                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
565                                 output->output(data);
566                                 data.clear();
567                         } 
568                         output->resetFile();
569                 }
570         }
571         catch(exception& e) {
572                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
573                 exit(1);
574         }
575 }
576
577
578 /***********************************************************/
579 void UnifracWeightedCommand::printWSummaryFile() {
580         try {
581                 //column headers
582                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
583                 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine(); 
584                 
585                 //format output
586                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
587                 
588                 //print each line
589                 int count = 0;
590                 for (int i = 0; i < T.size(); i++) { 
591                         for (int j = 0; j < numComp; j++) {
592                                 if (random) {
593                                         if (WScoreSig[count] > (1/(float)iters)) {
594                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
595                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
596                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
597                                         }else{
598                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
599                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
600                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
601                                         }
602                                 }else{
603                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
604                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
605                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n"); 
606                                 }
607                                 count++;
608                         }
609                 }
610                 outSum.close();
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
614                 exit(1);
615         }
616 }
617 /***********************************************************/
618 void UnifracWeightedCommand::createPhylipFile() {
619         try {
620                 int count = 0;
621                 //for each tree
622                 for (int i = 0; i < T.size(); i++) { 
623                 
624                         string phylipFileName;
625                         if ((outputForm == "lt") || (outputForm == "square")) {
626                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
627                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
628                         }else { //column
629                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
630                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
631                         }
632                         
633                         ofstream out;
634                         m->openOutputFile(phylipFileName, out);
635                         
636                         if ((outputForm == "lt") || (outputForm == "square")) {
637                                 //output numSeqs
638                                 out << m->Groups.size() << endl;
639                         }
640
641                         //make matrix with scores in it
642                         vector< vector<float> > dists;  dists.resize(m->Groups.size());
643                         for (int i = 0; i < m->Groups.size(); i++) {
644                                 dists[i].resize(m->Groups.size(), 0.0);
645                         }
646                         
647                         //flip it so you can print it
648                         for (int r=0; r<m->Groups.size(); r++) { 
649                                 for (int l = 0; l < r; l++) {
650                                         dists[r][l] = utreeScores[count];
651                                         dists[l][r] = utreeScores[count];
652                                         count++;
653                                 }
654                         }
655
656                         //output to file
657                         for (int r=0; r<m->Groups.size(); r++) { 
658                                 //output name
659                                 string name = m->Groups[r];
660                                 if (name.length() < 10) { //pad with spaces to make compatible
661                                         while (name.length() < 10) {  name += " ";  }
662                                 }
663                                 
664                                 if (outputForm == "lt") {
665                                         out << name << '\t';
666                                         
667                                         //output distances
668                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
669                                         out << endl;
670                                 }else if (outputForm == "square") {
671                                         out << name << '\t';
672                                         
673                                         //output distances
674                                         for (int l = 0; l < m->Groups.size(); l++) {    out  << dists[r][l] << '\t';  }
675                                         out << endl;
676                                 }else{
677                                         //output distances
678                                         for (int l = 0; l < r; l++) {   
679                                                 string otherName = m->Groups[l];
680                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
681                                                         while (otherName.length() < 10) {  otherName += " ";  }
682                                                 }
683                                                 
684                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
685                                         }
686                                 }
687                         }
688                         out.close();
689                 }
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
693                 exit(1);
694         }
695 }
696 /***********************************************************/
697 int UnifracWeightedCommand::findIndex(float score, int index) {
698         try{
699                 for (int i = 0; i < rScores[index].size(); i++) {
700                         if (rScores[index][i] >= score) {       return i;       }
701                 }
702                 return rScores[index].size();
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
706                 exit(1);
707         }
708 }
709
710 /***********************************************************/
711
712 void UnifracWeightedCommand::calculateFreqsCumuls() {
713         try {
714                 //clear out old tree values
715                 rScoreFreq.clear();
716                 rScoreFreq.resize(numComp);
717                 rCumul.clear();
718                 rCumul.resize(numComp);
719                 validScores.clear();
720         
721                 //calculate frequency
722                 for (int f = 0; f < numComp; f++) {
723                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
724                                 validScores[rScores[f][i]] = rScores[f][i];
725                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
726                                 if (it != rScoreFreq[f].end()) {
727                                         rScoreFreq[f][rScores[f][i]]++;
728                                 }else{
729                                         rScoreFreq[f][rScores[f][i]] = 1;
730                                 }
731                         }
732                 }
733                 
734                 //calculate rcumul
735                 for(int a = 0; a < numComp; a++) {
736                         float rcumul = 1.0000;
737                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
738                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
739                                 //make rscoreFreq map and rCumul
740                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
741                                 rCumul[a][it->first] = rcumul;
742                                 //get percentage of random trees with that info
743                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
744                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
745                         }
746                 }
747
748         }
749         catch(exception& e) {
750                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
751                 exit(1);
752         }
753 }
754 /*****************************************************************/
755 int UnifracWeightedCommand::readNamesFile() {
756         try {
757                 m->names.clear();
758                 numUniquesInName = 0;
759                 
760                 ifstream in;
761                 m->openInputFile(namefile, in);
762                 
763                 string first, second;
764                 map<string, string>::iterator itNames;
765                 
766                 while(!in.eof()) {
767                         in >> first >> second; m->gobble(in);
768                         
769                         numUniquesInName++;
770                         
771                         itNames = m->names.find(first);
772                         if (itNames == m->names.end()) {  
773                                 m->names[first] = second; 
774                                 
775                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
776                                 vector<string> dupNames;
777                                 m->splitAtComma(second, dupNames);
778                                 
779                                 for (int i = 0; i < dupNames.size(); i++) {     
780                                         nameMap[dupNames[i]] = dupNames[i]; 
781                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
782                                 }
783                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
784                 }
785                 in.close();
786                 
787                 return 0;
788         }
789         catch(exception& e) {
790                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
791                 exit(1);
792         }
793 }
794 /***********************************************************/
795
796
797
798
799