]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters=parser.getParameters();
90                         map<string,string>::iterator it;
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["weighted"] = tempOutNames;
102                         outputTypes["wsummary"] = tempOutNames;
103                         outputTypes["phylip"] = tempOutNames;
104                         outputTypes["column"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("tree");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("group");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                         }
135                         
136                         m->runParse = true;
137                         m->Groups.clear();
138                         m->namesOfGroups.clear();
139                         m->Treenames.clear();
140                         m->names.clear();
141                         
142                         //check for required parameters
143                         treefile = validParameter.validFile(parameters, "tree", true);
144                         if (treefile == "not open") { abort = true; }
145                         else if (treefile == "not found") {                             //if there is a current design file, use it
146                                 treefile = m->getTreeFile(); 
147                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
149                         }else { m->setTreeFile(treefile); }     
150                         
151                         //check for required parameters
152                         groupfile = validParameter.validFile(parameters, "group", true);
153                         if (groupfile == "not open") { abort = true; }
154                         else if (groupfile == "not found") { groupfile = ""; }
155                         else { m->setGroupFile(groupfile); }
156                         
157                         namefile = validParameter.validFile(parameters, "name", true);
158                         if (namefile == "not open") { abort = true; }
159                         else if (namefile == "not found") { namefile = ""; }
160                         else { m->setNameFile(namefile); }
161                         
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
163                         
164                                                                                                                                         
165                         //check for optional parameter and set defaults
166                         // ...at some point should added some additional type checking...
167                         groups = validParameter.validFile(parameters, "groups", false);                 
168                         if (groups == "not found") { groups = ""; }
169                         else { 
170                                 m->splitAtDash(groups, Groups);
171                                 m->Groups = Groups;
172                         }
173                                 
174                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
175                         convert(itersString, iters); 
176                         
177                         string temp = validParameter.validFile(parameters, "distance", false);                  
178                         if (temp == "not found") { phylip = false; outputForm = ""; }
179                         else{
180                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
181                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
182                         }
183                         
184                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
185                         random = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
188                         includeRoot = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
191                         m->setProcessors(temp);
192                         convert(temp, processors);
193                         
194                         if (!random) {  iters = 0;  } //turn off random calcs
195                 }
196                 
197                 
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
201                 exit(1);
202         }
203 }
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
206         try {
207         
208                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
209                 
210                 m->setTreeFile(treefile);
211                 
212                 if (groupfile != "") {
213                         //read in group map info.
214                         tmap = new TreeMap(groupfile);
215                         tmap->readMap();
216                 }else{ //fake out by putting everyone in one group
217                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
218                         tmap = new TreeMap();
219                         
220                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
221                 }
222                 
223                 if (namefile != "") { readNamesFile(); }
224                 
225                 read = new ReadNewickTree(treefile);
226                 int readOk = read->read(tmap); 
227                 
228                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
229                 
230                 read->AssembleTrees();
231                 T = read->getTrees();
232                 delete read;
233                 
234                 //make sure all files match
235                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
236                 int numNamesInTree;
237                 if (namefile != "")  {  
238                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
239                         else {   numNamesInTree = m->Treenames.size();  }
240                 }else {  numNamesInTree = m->Treenames.size();  }
241                 
242                 
243                 //output any names that are in group file but not in tree
244                 if (numNamesInTree < tmap->getNumSeqs()) {
245                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246                                 //is that name in the tree?
247                                 int count = 0;
248                                 for (int j = 0; j < m->Treenames.size(); j++) {
249                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
250                                         count++;
251                                 }
252                                 
253                                 if (m->control_pressed) { 
254                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
256                                         m->Groups.clear();
257                                         return 0;
258                                 }
259                                 
260                                 //then you did not find it so report it 
261                                 if (count == m->Treenames.size()) { 
262                                         //if it is in your namefile then don't remove
263                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
264                                         
265                                         if (it == nameMap.end()) {
266                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
268                                                 i--; //need this because removeSeq removes name from namesOfSeqs
269                                         }
270                                 }
271                         }
272                 }
273                 
274                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275                 m->openOutputFile(sumFile, outSum);
276                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
277                         
278                 util = new SharedUtil();
279                 string s; //to make work with setgroups
280                 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted");      //sets the groups the user wants to analyze
281                 util->getCombos(groupComb, m->Groups, numComp);
282                 delete util;
283                 
284                 weighted = new Weighted(tmap, includeRoot);
285                         
286                 int start = time(NULL);
287                 
288                 //get weighted for users tree
289                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
290                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
291                 
292                 if (numComp < processors) { processors = numComp; }
293                                 
294                 //get weighted scores for users trees
295                 for (int i = 0; i < T.size(); i++) {
296                         
297                         if (m->control_pressed) { delete tmap; delete weighted;
298                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
299
300                         counter = 0;
301                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
302                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
303                         
304                         if (random) {  
305                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
306                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
307                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
308                         } 
309
310                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
311                         
312                         if (m->control_pressed) { delete tmap; delete weighted;
313                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
314                         
315                         //save users score
316                         for (int s=0; s<numComp; s++) {
317                                 //add users score to vector of user scores
318                                 uScores[s].push_back(userData[s]);
319                                 
320                                 //save users tree score for summary file
321                                 utreeScores.push_back(userData[s]);
322                         }
323                         
324                         if (random) { 
325                         
326                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
327                                 vector< vector<string> > namesOfGroupCombos;
328                                 for (int a=0; a<numGroups; a++) { 
329                                         for (int l = 0; l < a; l++) {   
330                                                 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
331                                                 namesOfGroupCombos.push_back(groups);
332                                         }
333                                 }
334                                 
335                                 lines.clear();
336                                 
337                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
338                                         if(processors != 1){
339                                                 int numPairs = namesOfGroupCombos.size();
340                                                 int numPairsPerProcessor = numPairs / processors;
341                                         
342                                                 for (int i = 0; i < processors; i++) {
343                                                         int startPos = i * numPairsPerProcessor;
344                                                         if(i == processors - 1){
345                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
346                                                         }
347                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
348                                                 }
349                                         }
350                                 #endif
351
352                                 
353                                 //get scores for random trees
354                                 for (int j = 0; j < iters; j++) {
355                                 
356                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
357                                                 if(processors == 1){
358                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
359                                                 }else{
360                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
361                                                 }
362                                         #else
363                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
364                                         #endif
365                                         
366                                         if (m->control_pressed) { delete tmap; delete weighted;
367                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
368                                         
369                                         //report progress
370 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
371                                 }
372                                 lines.clear();
373                         
374                                 //find the signifigance of the score for summary file
375                                 for (int f = 0; f < numComp; f++) {
376                                         //sort random scores
377                                         sort(rScores[f].begin(), rScores[f].end());
378                                         
379                                         //the index of the score higher than yours is returned 
380                                         //so if you have 1000 random trees the index returned is 100 
381                                         //then there are 900 trees with a score greater then you. 
382                                         //giving you a signifigance of 0.900
383                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
384                                         
385                                         //the signifigance is the number of trees with the users score or higher 
386                                         WScoreSig.push_back((iters-index)/(float)iters);
387                                 }
388                                 
389                                 //out << "Tree# " << i << endl;
390                                 calculateFreqsCumuls();
391                                 printWeightedFile();
392                                 
393                                 delete output;
394                         
395                         }
396                         
397                         //clear data
398                         rScores.clear();
399                         uScores.clear();
400                         validScores.clear();
401                 }
402                 
403                 
404                 if (m->control_pressed) { delete tmap; delete weighted;
405                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
406                 
407                 printWSummaryFile();
408                 
409                 if (phylip) {   createPhylipFile();             }
410
411                 //clear out users groups
412                 m->Groups.clear();
413                 delete tmap; delete weighted;
414                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
415                 
416                 
417                 if (m->control_pressed) { 
418                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
419                         return 0; 
420                 }
421                 
422                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
423                 
424                 //set phylip file as new current phylipfile
425                 string current = "";
426                 itTypes = outputTypes.find("phylip");
427                 if (itTypes != outputTypes.end()) {
428                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
429                 }
430                 
431                 //set column file as new current columnfile
432                 itTypes = outputTypes.find("column");
433                 if (itTypes != outputTypes.end()) {
434                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
435                 }
436                 
437                 m->mothurOutEndLine();
438                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
439                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
440                 m->mothurOutEndLine();
441                 
442                 return 0;
443                 
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "UnifracWeightedCommand", "execute");
447                 exit(1);
448         }
449 }
450 /**************************************************************************************************/
451
452 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
453         try {
454 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
455                 int process = 1;
456                 vector<int> processIDS;
457                 
458                 EstOutput results;
459                 
460                 //loop through and create all the processes you want
461                 while (process != processors) {
462                         int pid = fork();
463                         
464                         if (pid > 0) {
465                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
466                                 process++;
467                         }else if (pid == 0){
468                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
469                         
470                                 //pass numSeqs to parent
471                                 ofstream out;
472                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
473                                 m->openOutputFile(tempFile, out);
474                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
475                                 out.close();
476                                 
477                                 exit(0);
478                         }else { 
479                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
480                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
481                                 exit(0);
482                         }
483                 }
484                 
485                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
486                 
487                 //force parent to wait until all the processes are done
488                 for (int i=0;i<(processors-1);i++) { 
489                         int temp = processIDS[i];
490                         wait(&temp);
491                 }
492                 
493                 //get data created by processes
494                 for (int i=0;i<(processors-1);i++) { 
495         
496                         ifstream in;
497                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
498                         m->openInputFile(s, in);
499                         
500                         double tempScore;
501                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
502                         in.close();
503                         m->mothurRemove(s);
504                 }
505                 
506                 return 0;
507 #endif          
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
511                 exit(1);
512         }
513 }
514
515 /**************************************************************************************************/
516 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
517  try {
518                 Tree* randT = new Tree(tmap);
519
520                 for (int h = start; h < (start+num); h++) {
521         
522                         if (m->control_pressed) { return 0; }
523                 
524                         //initialize weighted score
525                         string groupA = namesOfGroupCombos[h][0]; 
526                         string groupB = namesOfGroupCombos[h][1];
527                         
528                         //copy T[i]'s info.
529                         randT->getCopy(t);
530                          
531                         //create a random tree with same topology as T[i], but different labels
532                         randT->assembleRandomUnifracTree(groupA, groupB);
533                         
534                         if (m->control_pressed) { delete randT;  return 0;  }
535
536                         //get wscore of random tree
537                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
538                 
539                         if (m->control_pressed) { delete randT;  return 0;  }
540                                                                                 
541                         //save scores
542                         scores[h].push_back(randomData[0]);
543                 }
544         
545                 delete randT;
546         
547                 return 0;
548
549         }
550         catch(exception& e) {
551                 m->errorOut(e, "UnifracWeightedCommand", "driver");
552                 exit(1);
553         }
554 }
555 /***********************************************************/
556 void UnifracWeightedCommand::printWeightedFile() {
557         try {
558                 vector<double> data;
559                 vector<string> tags;
560                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
561                 
562                 for(int a = 0; a < numComp; a++) {
563                         output->initFile(groupComb[a], tags);
564                         //print each line
565                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
566                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
567                                 output->output(data);
568                                 data.clear();
569                         } 
570                         output->resetFile();
571                 }
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
575                 exit(1);
576         }
577 }
578
579
580 /***********************************************************/
581 void UnifracWeightedCommand::printWSummaryFile() {
582         try {
583                 //column headers
584                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
585                 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine(); 
586                 
587                 //format output
588                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
589                 
590                 //print each line
591                 int count = 0;
592                 for (int i = 0; i < T.size(); i++) { 
593                         for (int j = 0; j < numComp; j++) {
594                                 if (random) {
595                                         if (WScoreSig[count] > (1/(float)iters)) {
596                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
597                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
598                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
599                                         }else{
600                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
601                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
602                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
603                                         }
604                                 }else{
605                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
606                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
607                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n"); 
608                                 }
609                                 count++;
610                         }
611                 }
612                 outSum.close();
613         }
614         catch(exception& e) {
615                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
616                 exit(1);
617         }
618 }
619 /***********************************************************/
620 void UnifracWeightedCommand::createPhylipFile() {
621         try {
622                 int count = 0;
623                 //for each tree
624                 for (int i = 0; i < T.size(); i++) { 
625                 
626                         string phylipFileName;
627                         if ((outputForm == "lt") || (outputForm == "square")) {
628                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
629                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
630                         }else { //column
631                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
632                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
633                         }
634                         
635                         ofstream out;
636                         m->openOutputFile(phylipFileName, out);
637                         
638                         if ((outputForm == "lt") || (outputForm == "square")) {
639                                 //output numSeqs
640                                 out << m->Groups.size() << endl;
641                         }
642
643                         //make matrix with scores in it
644                         vector< vector<float> > dists;  dists.resize(m->Groups.size());
645                         for (int i = 0; i < m->Groups.size(); i++) {
646                                 dists[i].resize(m->Groups.size(), 0.0);
647                         }
648                         
649                         //flip it so you can print it
650                         for (int r=0; r<m->Groups.size(); r++) { 
651                                 for (int l = 0; l < r; l++) {
652                                         dists[r][l] = utreeScores[count];
653                                         dists[l][r] = utreeScores[count];
654                                         count++;
655                                 }
656                         }
657
658                         //output to file
659                         for (int r=0; r<m->Groups.size(); r++) { 
660                                 //output name
661                                 string name = m->Groups[r];
662                                 if (name.length() < 10) { //pad with spaces to make compatible
663                                         while (name.length() < 10) {  name += " ";  }
664                                 }
665                                 
666                                 if (outputForm == "lt") {
667                                         out << name << '\t';
668                                         
669                                         //output distances
670                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
671                                         out << endl;
672                                 }else if (outputForm == "square") {
673                                         out << name << '\t';
674                                         
675                                         //output distances
676                                         for (int l = 0; l < m->Groups.size(); l++) {    out  << dists[r][l] << '\t';  }
677                                         out << endl;
678                                 }else{
679                                         //output distances
680                                         for (int l = 0; l < r; l++) {   
681                                                 string otherName = m->Groups[l];
682                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
683                                                         while (otherName.length() < 10) {  otherName += " ";  }
684                                                 }
685                                                 
686                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
687                                         }
688                                 }
689                         }
690                         out.close();
691                 }
692         }
693         catch(exception& e) {
694                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
695                 exit(1);
696         }
697 }
698 /***********************************************************/
699 int UnifracWeightedCommand::findIndex(float score, int index) {
700         try{
701                 for (int i = 0; i < rScores[index].size(); i++) {
702                         if (rScores[index][i] >= score) {       return i;       }
703                 }
704                 return rScores[index].size();
705         }
706         catch(exception& e) {
707                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
708                 exit(1);
709         }
710 }
711
712 /***********************************************************/
713
714 void UnifracWeightedCommand::calculateFreqsCumuls() {
715         try {
716                 //clear out old tree values
717                 rScoreFreq.clear();
718                 rScoreFreq.resize(numComp);
719                 rCumul.clear();
720                 rCumul.resize(numComp);
721                 validScores.clear();
722         
723                 //calculate frequency
724                 for (int f = 0; f < numComp; f++) {
725                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
726                                 validScores[rScores[f][i]] = rScores[f][i];
727                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
728                                 if (it != rScoreFreq[f].end()) {
729                                         rScoreFreq[f][rScores[f][i]]++;
730                                 }else{
731                                         rScoreFreq[f][rScores[f][i]] = 1;
732                                 }
733                         }
734                 }
735                 
736                 //calculate rcumul
737                 for(int a = 0; a < numComp; a++) {
738                         float rcumul = 1.0000;
739                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
740                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
741                                 //make rscoreFreq map and rCumul
742                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
743                                 rCumul[a][it->first] = rcumul;
744                                 //get percentage of random trees with that info
745                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
746                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
747                         }
748                 }
749
750         }
751         catch(exception& e) {
752                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
753                 exit(1);
754         }
755 }
756 /*****************************************************************/
757 int UnifracWeightedCommand::readNamesFile() {
758         try {
759                 m->names.clear();
760                 numUniquesInName = 0;
761                 
762                 ifstream in;
763                 m->openInputFile(namefile, in);
764                 
765                 string first, second;
766                 map<string, string>::iterator itNames;
767                 
768                 while(!in.eof()) {
769                         in >> first >> second; m->gobble(in);
770                         
771                         numUniquesInName++;
772                         
773                         itNames = m->names.find(first);
774                         if (itNames == m->names.end()) {  
775                                 m->names[first] = second; 
776                                 
777                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
778                                 vector<string> dupNames;
779                                 m->splitAtComma(second, dupNames);
780                                 
781                                 for (int i = 0; i < dupNames.size(); i++) {     
782                                         nameMap[dupNames[i]] = dupNames[i]; 
783                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
784                                 }
785                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
786                 }
787                 in.close();
788                 
789                 return 0;
790         }
791         catch(exception& e) {
792                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
793                 exit(1);
794         }
795 }
796 /***********************************************************/
797
798
799
800
801