]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
added multiple processors option for Windows users to align.seqs, dist.seqs, summary...
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters=parser.getParameters();
90                         map<string,string>::iterator it;
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["weighted"] = tempOutNames;
102                         outputTypes["wsummary"] = tempOutNames;
103                         outputTypes["phylip"] = tempOutNames;
104                         outputTypes["column"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("tree");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("group");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                         }
135                         
136                         m->runParse = true;
137                         m->clearGroups();
138                         m->clearAllGroups();
139                         m->Treenames.clear();
140                         m->names.clear();
141                         
142                         //check for required parameters
143                         treefile = validParameter.validFile(parameters, "tree", true);
144                         if (treefile == "not open") { abort = true; }
145                         else if (treefile == "not found") {                             //if there is a current design file, use it
146                                 treefile = m->getTreeFile(); 
147                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
149                         }else { m->setTreeFile(treefile); }     
150                         
151                         //check for required parameters
152                         groupfile = validParameter.validFile(parameters, "group", true);
153                         if (groupfile == "not open") { abort = true; }
154                         else if (groupfile == "not found") { groupfile = ""; }
155                         else { m->setGroupFile(groupfile); }
156                         
157                         namefile = validParameter.validFile(parameters, "name", true);
158                         if (namefile == "not open") { abort = true; }
159                         else if (namefile == "not found") { namefile = ""; }
160                         else { m->setNameFile(namefile); }
161                         
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
163                         
164                                                                                                                                         
165                         //check for optional parameter and set defaults
166                         // ...at some point should added some additional type checking...
167                         groups = validParameter.validFile(parameters, "groups", false);                 
168                         if (groups == "not found") { groups = ""; }
169                         else { 
170                                 m->splitAtDash(groups, Groups);
171                                 m->setGroups(Groups);
172                         }
173                                 
174                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
175                         convert(itersString, iters); 
176                         
177                         string temp = validParameter.validFile(parameters, "distance", false);                  
178                         if (temp == "not found") { phylip = false; outputForm = ""; }
179                         else{
180                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
181                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
182                         }
183                         
184                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
185                         random = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
188                         includeRoot = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
191                         m->setProcessors(temp);
192                         convert(temp, processors);
193                         
194                         if (!random) {  iters = 0;  } //turn off random calcs
195                 }
196                 
197                 
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
201                 exit(1);
202         }
203 }
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
206         try {
207         
208                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
209                 
210                 m->setTreeFile(treefile);
211                 
212                 if (groupfile != "") {
213                         //read in group map info.
214                         tmap = new TreeMap(groupfile);
215                         tmap->readMap();
216                 }else{ //fake out by putting everyone in one group
217                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
218                         tmap = new TreeMap();
219                         
220                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
221                 }
222                 
223                 if (namefile != "") { readNamesFile(); }
224                 
225                 read = new ReadNewickTree(treefile);
226                 int readOk = read->read(tmap); 
227                 
228                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
229                 
230                 read->AssembleTrees();
231                 T = read->getTrees();
232                 delete read;
233                 
234                 //make sure all files match
235                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
236                 int numNamesInTree;
237                 if (namefile != "")  {  
238                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
239                         else {   numNamesInTree = m->Treenames.size();  }
240                 }else {  numNamesInTree = m->Treenames.size();  }
241                 
242                 
243                 //output any names that are in group file but not in tree
244                 if (numNamesInTree < tmap->getNumSeqs()) {
245                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246                                 //is that name in the tree?
247                                 int count = 0;
248                                 for (int j = 0; j < m->Treenames.size(); j++) {
249                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
250                                         count++;
251                                 }
252                                 
253                                 if (m->control_pressed) { 
254                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
256                                         m->clearGroups();
257                                         return 0;
258                                 }
259                                 
260                                 //then you did not find it so report it 
261                                 if (count == m->Treenames.size()) { 
262                                         //if it is in your namefile then don't remove
263                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
264                                         
265                                         if (it == nameMap.end()) {
266                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
268                                                 i--; //need this because removeSeq removes name from namesOfSeqs
269                                         }
270                                 }
271                         }
272                 }
273                 
274                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275                 m->openOutputFile(sumFile, outSum);
276                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
277                         
278                 util = new SharedUtil();
279                 string s; //to make work with setgroups
280                 vector<string> Groups = m->getGroups();
281                 vector<string> nameGroups = tmap->getNamesOfGroups();
282                 util->setGroups(Groups, nameGroups, s, numGroups, "weighted");  //sets the groups the user wants to analyze
283                 util->getCombos(groupComb, Groups, numComp);
284                 delete util;
285                 
286                 weighted = new Weighted(tmap, includeRoot);
287                         
288                 int start = time(NULL);
289                 
290                 //get weighted for users tree
291                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
292                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
293                 
294                 if (numComp < processors) { processors = numComp; }
295                                 
296                 //get weighted scores for users trees
297                 for (int i = 0; i < T.size(); i++) {
298                         
299                         if (m->control_pressed) { delete tmap; delete weighted;
300                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
301
302                         counter = 0;
303                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
304                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
305                         
306                         if (random) {  
307                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
308                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
309                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
310                         } 
311
312                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
313                         
314                         if (m->control_pressed) { delete tmap; delete weighted;
315                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
316                         
317                         //save users score
318                         for (int s=0; s<numComp; s++) {
319                                 //add users score to vector of user scores
320                                 uScores[s].push_back(userData[s]);
321                                 
322                                 //save users tree score for summary file
323                                 utreeScores.push_back(userData[s]);
324                         }
325                         
326                         if (random) { 
327                         
328                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
329                                 vector< vector<string> > namesOfGroupCombos;
330                                 for (int a=0; a<numGroups; a++) { 
331                                         for (int l = 0; l < a; l++) {   
332                                                 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
333                                                 namesOfGroupCombos.push_back(groups);
334                                         }
335                                 }
336                                 
337                                 lines.clear();
338                                 
339                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
340                                         if(processors != 1){
341                                                 int numPairs = namesOfGroupCombos.size();
342                                                 int numPairsPerProcessor = numPairs / processors;
343                                         
344                                                 for (int i = 0; i < processors; i++) {
345                                                         int startPos = i * numPairsPerProcessor;
346                                                         if(i == processors - 1){
347                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
348                                                         }
349                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
350                                                 }
351                                         }
352                                 #endif
353
354                                 
355                                 //get scores for random trees
356                                 for (int j = 0; j < iters; j++) {
357                                 
358                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
359                                                 if(processors == 1){
360                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
361                                                 }else{
362                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
363                                                 }
364                                         #else
365                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
366                                         #endif
367                                         
368                                         if (m->control_pressed) { delete tmap; delete weighted;
369                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
370                                         
371                                         //report progress
372 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
373                                 }
374                                 lines.clear();
375                         
376                                 //find the signifigance of the score for summary file
377                                 for (int f = 0; f < numComp; f++) {
378                                         //sort random scores
379                                         sort(rScores[f].begin(), rScores[f].end());
380                                         
381                                         //the index of the score higher than yours is returned 
382                                         //so if you have 1000 random trees the index returned is 100 
383                                         //then there are 900 trees with a score greater then you. 
384                                         //giving you a signifigance of 0.900
385                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
386                                         
387                                         //the signifigance is the number of trees with the users score or higher 
388                                         WScoreSig.push_back((iters-index)/(float)iters);
389                                 }
390                                 
391                                 //out << "Tree# " << i << endl;
392                                 calculateFreqsCumuls();
393                                 printWeightedFile();
394                                 
395                                 delete output;
396                         
397                         }
398                         
399                         //clear data
400                         rScores.clear();
401                         uScores.clear();
402                         validScores.clear();
403                 }
404                 
405                 
406                 if (m->control_pressed) { delete tmap; delete weighted;
407                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
408                 
409                 printWSummaryFile();
410                 
411                 if (phylip) {   createPhylipFile();             }
412
413                 //clear out users groups
414                 m->clearGroups();
415                 delete tmap; delete weighted;
416                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
417                 
418                 
419                 if (m->control_pressed) { 
420                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
421                         return 0; 
422                 }
423                 
424                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
425                 
426                 //set phylip file as new current phylipfile
427                 string current = "";
428                 itTypes = outputTypes.find("phylip");
429                 if (itTypes != outputTypes.end()) {
430                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
431                 }
432                 
433                 //set column file as new current columnfile
434                 itTypes = outputTypes.find("column");
435                 if (itTypes != outputTypes.end()) {
436                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
437                 }
438                 
439                 m->mothurOutEndLine();
440                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
441                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
442                 m->mothurOutEndLine();
443                 
444                 return 0;
445                 
446         }
447         catch(exception& e) {
448                 m->errorOut(e, "UnifracWeightedCommand", "execute");
449                 exit(1);
450         }
451 }
452 /**************************************************************************************************/
453
454 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
455         try {
456 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
457                 int process = 1;
458                 vector<int> processIDS;
459                 
460                 EstOutput results;
461                 
462                 //loop through and create all the processes you want
463                 while (process != processors) {
464                         int pid = fork();
465                         
466                         if (pid > 0) {
467                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
468                                 process++;
469                         }else if (pid == 0){
470                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
471                         
472                                 //pass numSeqs to parent
473                                 ofstream out;
474                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
475                                 m->openOutputFile(tempFile, out);
476                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
477                                 out.close();
478                                 
479                                 exit(0);
480                         }else { 
481                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
482                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
483                                 exit(0);
484                         }
485                 }
486                 
487                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
488                 
489                 //force parent to wait until all the processes are done
490                 for (int i=0;i<(processors-1);i++) { 
491                         int temp = processIDS[i];
492                         wait(&temp);
493                 }
494                 
495                 //get data created by processes
496                 for (int i=0;i<(processors-1);i++) { 
497         
498                         ifstream in;
499                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
500                         m->openInputFile(s, in);
501                         
502                         double tempScore;
503                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
504                         in.close();
505                         m->mothurRemove(s);
506                 }
507                 
508                 return 0;
509 #endif          
510         }
511         catch(exception& e) {
512                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
513                 exit(1);
514         }
515 }
516
517 /**************************************************************************************************/
518 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
519  try {
520                 Tree* randT = new Tree(tmap);
521
522                 for (int h = start; h < (start+num); h++) {
523         
524                         if (m->control_pressed) { return 0; }
525                 
526                         //initialize weighted score
527                         string groupA = namesOfGroupCombos[h][0]; 
528                         string groupB = namesOfGroupCombos[h][1];
529                         
530                         //copy T[i]'s info.
531                         randT->getCopy(t);
532                          
533                         //create a random tree with same topology as T[i], but different labels
534                         randT->assembleRandomUnifracTree(groupA, groupB);
535                         
536                         if (m->control_pressed) { delete randT;  return 0;  }
537
538                         //get wscore of random tree
539                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
540                 
541                         if (m->control_pressed) { delete randT;  return 0;  }
542                                                                                 
543                         //save scores
544                         scores[h].push_back(randomData[0]);
545                 }
546         
547                 delete randT;
548         
549                 return 0;
550
551         }
552         catch(exception& e) {
553                 m->errorOut(e, "UnifracWeightedCommand", "driver");
554                 exit(1);
555         }
556 }
557 /***********************************************************/
558 void UnifracWeightedCommand::printWeightedFile() {
559         try {
560                 vector<double> data;
561                 vector<string> tags;
562                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
563                 
564                 for(int a = 0; a < numComp; a++) {
565                         output->initFile(groupComb[a], tags);
566                         //print each line
567                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
568                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
569                                 output->output(data);
570                                 data.clear();
571                         } 
572                         output->resetFile();
573                 }
574         }
575         catch(exception& e) {
576                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
577                 exit(1);
578         }
579 }
580
581
582 /***********************************************************/
583 void UnifracWeightedCommand::printWSummaryFile() {
584         try {
585                 //column headers
586                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
587                 m->mothurOut("Tree#\tGroups\tWScore\t");
588                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
589                 outSum << endl; m->mothurOutEndLine();
590                 
591                 //format output
592                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
593                 
594                 //print each line
595                 int count = 0;
596                 for (int i = 0; i < T.size(); i++) { 
597                         for (int j = 0; j < numComp; j++) {
598                                 if (random) {
599                                         if (WScoreSig[count] > (1/(float)iters)) {
600                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
601                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
602                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
603                                         }else{
604                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
605                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
606                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
607                                         }
608                                 }else{
609                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
610                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
611                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
612                                 }
613                                 count++;
614                         }
615                 }
616                 outSum.close();
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
620                 exit(1);
621         }
622 }
623 /***********************************************************/
624 void UnifracWeightedCommand::createPhylipFile() {
625         try {
626                 int count = 0;
627                 //for each tree
628                 for (int i = 0; i < T.size(); i++) { 
629                 
630                         string phylipFileName;
631                         if ((outputForm == "lt") || (outputForm == "square")) {
632                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
633                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
634                         }else { //column
635                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
636                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
637                         }
638                         
639                         ofstream out;
640                         m->openOutputFile(phylipFileName, out);
641                         
642                         if ((outputForm == "lt") || (outputForm == "square")) {
643                                 //output numSeqs
644                                 out << m->getNumGroups() << endl;
645                         }
646
647                         //make matrix with scores in it
648                         vector< vector<float> > dists;  dists.resize(m->getNumGroups());
649                         for (int i = 0; i < m->getNumGroups(); i++) {
650                                 dists[i].resize(m->getNumGroups(), 0.0);
651                         }
652                         
653                         //flip it so you can print it
654                         for (int r=0; r<m->getNumGroups(); r++) { 
655                                 for (int l = 0; l < r; l++) {
656                                         dists[r][l] = utreeScores[count];
657                                         dists[l][r] = utreeScores[count];
658                                         count++;
659                                 }
660                         }
661
662                         //output to file
663                         for (int r=0; r<m->getNumGroups(); r++) { 
664                                 //output name
665                                 string name = (m->getGroups())[r];
666                                 if (name.length() < 10) { //pad with spaces to make compatible
667                                         while (name.length() < 10) {  name += " ";  }
668                                 }
669                                 
670                                 if (outputForm == "lt") {
671                                         out << name << '\t';
672                                         
673                                         //output distances
674                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
675                                         out << endl;
676                                 }else if (outputForm == "square") {
677                                         out << name << '\t';
678                                         
679                                         //output distances
680                                         for (int l = 0; l < m->getNumGroups(); l++) {   out  << dists[r][l] << '\t';  }
681                                         out << endl;
682                                 }else{
683                                         //output distances
684                                         for (int l = 0; l < r; l++) {   
685                                                 string otherName = (m->getGroups())[l];
686                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
687                                                         while (otherName.length() < 10) {  otherName += " ";  }
688                                                 }
689                                                 
690                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
691                                         }
692                                 }
693                         }
694                         out.close();
695                 }
696         }
697         catch(exception& e) {
698                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
699                 exit(1);
700         }
701 }
702 /***********************************************************/
703 int UnifracWeightedCommand::findIndex(float score, int index) {
704         try{
705                 for (int i = 0; i < rScores[index].size(); i++) {
706                         if (rScores[index][i] >= score) {       return i;       }
707                 }
708                 return rScores[index].size();
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
712                 exit(1);
713         }
714 }
715
716 /***********************************************************/
717
718 void UnifracWeightedCommand::calculateFreqsCumuls() {
719         try {
720                 //clear out old tree values
721                 rScoreFreq.clear();
722                 rScoreFreq.resize(numComp);
723                 rCumul.clear();
724                 rCumul.resize(numComp);
725                 validScores.clear();
726         
727                 //calculate frequency
728                 for (int f = 0; f < numComp; f++) {
729                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
730                                 validScores[rScores[f][i]] = rScores[f][i];
731                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
732                                 if (it != rScoreFreq[f].end()) {
733                                         rScoreFreq[f][rScores[f][i]]++;
734                                 }else{
735                                         rScoreFreq[f][rScores[f][i]] = 1;
736                                 }
737                         }
738                 }
739                 
740                 //calculate rcumul
741                 for(int a = 0; a < numComp; a++) {
742                         float rcumul = 1.0000;
743                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
744                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
745                                 //make rscoreFreq map and rCumul
746                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
747                                 rCumul[a][it->first] = rcumul;
748                                 //get percentage of random trees with that info
749                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
750                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
751                         }
752                 }
753
754         }
755         catch(exception& e) {
756                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
757                 exit(1);
758         }
759 }
760 /*****************************************************************/
761 int UnifracWeightedCommand::readNamesFile() {
762         try {
763                 m->names.clear();
764                 numUniquesInName = 0;
765                 
766                 ifstream in;
767                 m->openInputFile(namefile, in);
768                 
769                 string first, second;
770                 map<string, string>::iterator itNames;
771                 
772                 while(!in.eof()) {
773                         in >> first >> second; m->gobble(in);
774                         
775                         numUniquesInName++;
776                         
777                         itNames = m->names.find(first);
778                         if (itNames == m->names.end()) {  
779                                 m->names[first] = second; 
780                                 
781                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
782                                 vector<string> dupNames;
783                                 m->splitAtComma(second, dupNames);
784                                 
785                                 for (int i = 0; i < dupNames.size(); i++) {     
786                                         nameMap[dupNames[i]] = dupNames[i]; 
787                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
788                                 }
789                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
790                 }
791                 in.close();
792                 
793                 return 0;
794         }
795         catch(exception& e) {
796                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
797                 exit(1);
798         }
799 }
800 /***********************************************************/
801
802
803
804
805