]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
added getCommandInfoCommand for gui
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 
84                 else {
85                         vector<string> myArray = setParameters();
86                         
87                         OptionParser parser(option);
88                         map<string,string> parameters=parser.getParameters();
89                         map<string,string>::iterator it;
90                         
91                         ValidParameters validParameter;
92                 
93                         //check to make sure all parameters are valid for command
94                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
95                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
96                         }
97                         
98                         //initialize outputTypes
99                         vector<string> tempOutNames;
100                         outputTypes["weighted"] = tempOutNames;
101                         outputTypes["wsummary"] = tempOutNames;
102                         outputTypes["phylip"] = tempOutNames;
103                         outputTypes["column"] = tempOutNames;
104                         
105                         //if the user changes the input directory command factory will send this info to us in the output parameter 
106                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
107                         if (inputDir == "not found"){   inputDir = "";          }
108                         else {
109                                 string path;
110                                 it = parameters.find("tree");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
116                                 }
117                                 
118                                 it = parameters.find("group");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
124                                 }
125                                 
126                                 it = parameters.find("name");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
132                                 }
133                         }
134                         
135                         m->runParse = true;
136                         
137                         //check for required parameters
138                         treefile = validParameter.validFile(parameters, "tree", true);
139                         if (treefile == "not open") { abort = true; }
140                         else if (treefile == "not found") {                             //if there is a current design file, use it
141                                 treefile = m->getTreeFile(); 
142                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
143                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
144                         }       
145                         
146                         //check for required parameters
147                         groupfile = validParameter.validFile(parameters, "group", true);
148                         if (groupfile == "not open") { abort = true; }
149                         else if (groupfile == "not found") { groupfile = ""; }
150                         
151                         namefile = validParameter.validFile(parameters, "name", true);
152                         if (namefile == "not open") { abort = true; }
153                         else if (namefile == "not found") { namefile = ""; }
154                         
155                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
156                         
157                                                                                                                                         
158                         //check for optional parameter and set defaults
159                         // ...at some point should added some additional type checking...
160                         groups = validParameter.validFile(parameters, "groups", false);                 
161                         if (groups == "not found") { groups = ""; }
162                         else { 
163                                 m->splitAtDash(groups, Groups);
164                                 m->Groups = Groups;
165                         }
166                                 
167                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
168                         convert(itersString, iters); 
169                         
170                         string temp = validParameter.validFile(parameters, "distance", false);                  
171                         if (temp == "not found") { phylip = false; outputForm = ""; }
172                         else{
173                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
174                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
175                         }
176                         
177                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
178                         random = m->isTrue(temp);
179                         
180                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
181                         includeRoot = m->isTrue(temp);
182                         
183                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
184                         m->setProcessors(temp);
185                         convert(temp, processors);
186                         
187                         if (!random) {  iters = 0;  } //turn off random calcs
188                 }
189                 
190                 
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
194                 exit(1);
195         }
196 }
197 /***********************************************************/
198 int UnifracWeightedCommand::execute() {
199         try {
200         
201                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
202                 
203                 if (groupfile != "") {
204                         //read in group map info.
205                         tmap = new TreeMap(groupfile);
206                         tmap->readMap();
207                 }else{ //fake out by putting everyone in one group
208                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
209                         tmap = new TreeMap();
210                         
211                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
212                 }
213                 
214                 if (namefile != "") { readNamesFile(); }
215                 
216                 read = new ReadNewickTree(treefile);
217                 int readOk = read->read(tmap); 
218                 
219                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
220                 
221                 read->AssembleTrees();
222                 T = read->getTrees();
223                 delete read;
224                 
225                 //make sure all files match
226                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
227                 int numNamesInTree;
228                 if (namefile != "")  {  
229                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
230                         else {   numNamesInTree = m->Treenames.size();  }
231                 }else {  numNamesInTree = m->Treenames.size();  }
232                 
233                 
234                 //output any names that are in group file but not in tree
235                 if (numNamesInTree < tmap->getNumSeqs()) {
236                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
237                                 //is that name in the tree?
238                                 int count = 0;
239                                 for (int j = 0; j < m->Treenames.size(); j++) {
240                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
241                                         count++;
242                                 }
243                                 
244                                 if (m->control_pressed) { 
245                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
246                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
247                                         m->Groups.clear();
248                                         return 0;
249                                 }
250                                 
251                                 //then you did not find it so report it 
252                                 if (count == m->Treenames.size()) { 
253                                         //if it is in your namefile then don't remove
254                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
255                                         
256                                         if (it == nameMap.end()) {
257                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
258                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
259                                                 i--; //need this because removeSeq removes name from namesOfSeqs
260                                         }
261                                 }
262                         }
263                 }
264                 
265                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
266                 m->openOutputFile(sumFile, outSum);
267                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
268                         
269                 util = new SharedUtil();
270                 string s; //to make work with setgroups
271                 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted");      //sets the groups the user wants to analyze
272                 util->getCombos(groupComb, m->Groups, numComp);
273                 delete util;
274                 
275                 weighted = new Weighted(tmap, includeRoot);
276                         
277                 int start = time(NULL);
278                 
279                 //get weighted for users tree
280                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
281                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
282                 
283                 if (numComp < processors) { processors = numComp; }
284                                 
285                 //get weighted scores for users trees
286                 for (int i = 0; i < T.size(); i++) {
287                         
288                         if (m->control_pressed) { delete tmap; delete weighted;
289                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str());  } return 0; }
290
291                         counter = 0;
292                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
293                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
294                         
295                         if (random) {  
296                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
297                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
298                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
299                         } 
300
301                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
302                         
303                         if (m->control_pressed) { delete tmap; delete weighted;
304                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
305                         
306                         //save users score
307                         for (int s=0; s<numComp; s++) {
308                                 //add users score to vector of user scores
309                                 uScores[s].push_back(userData[s]);
310                                 
311                                 //save users tree score for summary file
312                                 utreeScores.push_back(userData[s]);
313                         }
314                         
315                         if (random) { 
316                         
317                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
318                                 vector< vector<string> > namesOfGroupCombos;
319                                 for (int a=0; a<numGroups; a++) { 
320                                         for (int l = 0; l < a; l++) {   
321                                                 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
322                                                 namesOfGroupCombos.push_back(groups);
323                                         }
324                                 }
325                                 
326                                 lines.clear();
327                                 
328                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
329                                         if(processors != 1){
330                                                 int numPairs = namesOfGroupCombos.size();
331                                                 int numPairsPerProcessor = numPairs / processors;
332                                         
333                                                 for (int i = 0; i < processors; i++) {
334                                                         int startPos = i * numPairsPerProcessor;
335                                                         if(i == processors - 1){
336                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
337                                                         }
338                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
339                                                 }
340                                         }
341                                 #endif
342
343                                 
344                                 //get scores for random trees
345                                 for (int j = 0; j < iters; j++) {
346                                 
347                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
348                                                 if(processors == 1){
349                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
350                                                 }else{
351                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
352                                                 }
353                                         #else
354                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
355                                         #endif
356                                         
357                                         if (m->control_pressed) { delete tmap; delete weighted;
358                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
359                                         
360                                         //report progress
361 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
362                                 }
363                                 lines.clear();
364                         
365                                 //find the signifigance of the score for summary file
366                                 for (int f = 0; f < numComp; f++) {
367                                         //sort random scores
368                                         sort(rScores[f].begin(), rScores[f].end());
369                                         
370                                         //the index of the score higher than yours is returned 
371                                         //so if you have 1000 random trees the index returned is 100 
372                                         //then there are 900 trees with a score greater then you. 
373                                         //giving you a signifigance of 0.900
374                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
375                                         
376                                         //the signifigance is the number of trees with the users score or higher 
377                                         WScoreSig.push_back((iters-index)/(float)iters);
378                                 }
379                                 
380                                 //out << "Tree# " << i << endl;
381                                 calculateFreqsCumuls();
382                                 printWeightedFile();
383                                 
384                                 delete output;
385                         
386                         }
387                         
388                         //clear data
389                         rScores.clear();
390                         uScores.clear();
391                         validScores.clear();
392                 }
393                 
394                 
395                 if (m->control_pressed) { delete tmap; delete weighted;
396                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str());  } return 0;  }
397                 
398                 printWSummaryFile();
399                 
400                 if (phylip) {   createPhylipFile();             }
401
402                 //clear out users groups
403                 m->Groups.clear();
404                 delete tmap; delete weighted;
405                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
406                 
407                 
408                 if (m->control_pressed) { 
409                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
410                         return 0; 
411                 }
412                 
413                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
414                 
415                 //set phylip file as new current phylipfile
416                 string current = "";
417                 itTypes = outputTypes.find("phylip");
418                 if (itTypes != outputTypes.end()) {
419                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
420                 }
421                 
422                 //set column file as new current columnfile
423                 itTypes = outputTypes.find("column");
424                 if (itTypes != outputTypes.end()) {
425                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
426                 }
427                 
428                 m->mothurOutEndLine();
429                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
430                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
431                 m->mothurOutEndLine();
432                 
433                 return 0;
434                 
435         }
436         catch(exception& e) {
437                 m->errorOut(e, "UnifracWeightedCommand", "execute");
438                 exit(1);
439         }
440 }
441 /**************************************************************************************************/
442
443 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
444         try {
445 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
446                 int process = 1;
447                 vector<int> processIDS;
448                 
449                 EstOutput results;
450                 
451                 //loop through and create all the processes you want
452                 while (process != processors) {
453                         int pid = fork();
454                         
455                         if (pid > 0) {
456                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
457                                 process++;
458                         }else if (pid == 0){
459                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
460                         
461                                 //pass numSeqs to parent
462                                 ofstream out;
463                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
464                                 m->openOutputFile(tempFile, out);
465                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
466                                 out.close();
467                                 
468                                 exit(0);
469                         }else { 
470                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
471                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
472                                 exit(0);
473                         }
474                 }
475                 
476                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
477                 
478                 //force parent to wait until all the processes are done
479                 for (int i=0;i<(processors-1);i++) { 
480                         int temp = processIDS[i];
481                         wait(&temp);
482                 }
483                 
484                 //get data created by processes
485                 for (int i=0;i<(processors-1);i++) { 
486         
487                         ifstream in;
488                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
489                         m->openInputFile(s, in);
490                         
491                         double tempScore;
492                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
493                         in.close();
494                         remove(s.c_str());
495                 }
496                 
497                 return 0;
498 #endif          
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
502                 exit(1);
503         }
504 }
505
506 /**************************************************************************************************/
507 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
508  try {
509                 Tree* randT = new Tree(tmap);
510
511                 for (int h = start; h < (start+num); h++) {
512         
513                         if (m->control_pressed) { return 0; }
514                 
515                         //initialize weighted score
516                         string groupA = namesOfGroupCombos[h][0]; 
517                         string groupB = namesOfGroupCombos[h][1];
518                         
519                         //copy T[i]'s info.
520                         randT->getCopy(t);
521                          
522                         //create a random tree with same topology as T[i], but different labels
523                         randT->assembleRandomUnifracTree(groupA, groupB);
524                         
525                         if (m->control_pressed) { delete randT;  return 0;  }
526
527                         //get wscore of random tree
528                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
529                 
530                         if (m->control_pressed) { delete randT;  return 0;  }
531                                                                                 
532                         //save scores
533                         scores[h].push_back(randomData[0]);
534                 }
535         
536                 delete randT;
537         
538                 return 0;
539
540         }
541         catch(exception& e) {
542                 m->errorOut(e, "UnifracWeightedCommand", "driver");
543                 exit(1);
544         }
545 }
546 /***********************************************************/
547 void UnifracWeightedCommand::printWeightedFile() {
548         try {
549                 vector<double> data;
550                 vector<string> tags;
551                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
552                 
553                 for(int a = 0; a < numComp; a++) {
554                         output->initFile(groupComb[a], tags);
555                         //print each line
556                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
557                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
558                                 output->output(data);
559                                 data.clear();
560                         } 
561                         output->resetFile();
562                 }
563         }
564         catch(exception& e) {
565                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
566                 exit(1);
567         }
568 }
569
570
571 /***********************************************************/
572 void UnifracWeightedCommand::printWSummaryFile() {
573         try {
574                 //column headers
575                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
576                 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine(); 
577                 
578                 //format output
579                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
580                 
581                 //print each line
582                 int count = 0;
583                 for (int i = 0; i < T.size(); i++) { 
584                         for (int j = 0; j < numComp; j++) {
585                                 if (random) {
586                                         if (WScoreSig[count] > (1/(float)iters)) {
587                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
588                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
589                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
590                                         }else{
591                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
592                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
593                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
594                                         }
595                                 }else{
596                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
597                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
598                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n"); 
599                                 }
600                                 count++;
601                         }
602                 }
603                 outSum.close();
604         }
605         catch(exception& e) {
606                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
607                 exit(1);
608         }
609 }
610 /***********************************************************/
611 void UnifracWeightedCommand::createPhylipFile() {
612         try {
613                 int count = 0;
614                 //for each tree
615                 for (int i = 0; i < T.size(); i++) { 
616                 
617                         string phylipFileName;
618                         if ((outputForm == "lt") || (outputForm == "square")) {
619                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
620                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
621                         }else { //column
622                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
623                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
624                         }
625                         
626                         ofstream out;
627                         m->openOutputFile(phylipFileName, out);
628                         
629                         if ((outputForm == "lt") || (outputForm == "square")) {
630                                 //output numSeqs
631                                 out << m->Groups.size() << endl;
632                         }
633
634                         //make matrix with scores in it
635                         vector< vector<float> > dists;  dists.resize(m->Groups.size());
636                         for (int i = 0; i < m->Groups.size(); i++) {
637                                 dists[i].resize(m->Groups.size(), 0.0);
638                         }
639                         
640                         //flip it so you can print it
641                         for (int r=0; r<m->Groups.size(); r++) { 
642                                 for (int l = 0; l < r; l++) {
643                                         dists[r][l] = utreeScores[count];
644                                         dists[l][r] = utreeScores[count];
645                                         count++;
646                                 }
647                         }
648
649                         //output to file
650                         for (int r=0; r<m->Groups.size(); r++) { 
651                                 //output name
652                                 string name = m->Groups[r];
653                                 if (name.length() < 10) { //pad with spaces to make compatible
654                                         while (name.length() < 10) {  name += " ";  }
655                                 }
656                                 
657                                 if (outputForm == "lt") {
658                                         out << name << '\t';
659                                         
660                                         //output distances
661                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
662                                         out << endl;
663                                 }else if (outputForm == "square") {
664                                         out << name << '\t';
665                                         
666                                         //output distances
667                                         for (int l = 0; l < m->Groups.size(); l++) {    out  << dists[r][l] << '\t';  }
668                                         out << endl;
669                                 }else{
670                                         //output distances
671                                         for (int l = 0; l < r; l++) {   
672                                                 string otherName = m->Groups[l];
673                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
674                                                         while (otherName.length() < 10) {  otherName += " ";  }
675                                                 }
676                                                 
677                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
678                                         }
679                                 }
680                         }
681                         out.close();
682                 }
683         }
684         catch(exception& e) {
685                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
686                 exit(1);
687         }
688 }
689 /***********************************************************/
690 int UnifracWeightedCommand::findIndex(float score, int index) {
691         try{
692                 for (int i = 0; i < rScores[index].size(); i++) {
693                         if (rScores[index][i] >= score) {       return i;       }
694                 }
695                 return rScores[index].size();
696         }
697         catch(exception& e) {
698                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
699                 exit(1);
700         }
701 }
702
703 /***********************************************************/
704
705 void UnifracWeightedCommand::calculateFreqsCumuls() {
706         try {
707                 //clear out old tree values
708                 rScoreFreq.clear();
709                 rScoreFreq.resize(numComp);
710                 rCumul.clear();
711                 rCumul.resize(numComp);
712                 validScores.clear();
713         
714                 //calculate frequency
715                 for (int f = 0; f < numComp; f++) {
716                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
717                                 validScores[rScores[f][i]] = rScores[f][i];
718                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
719                                 if (it != rScoreFreq[f].end()) {
720                                         rScoreFreq[f][rScores[f][i]]++;
721                                 }else{
722                                         rScoreFreq[f][rScores[f][i]] = 1;
723                                 }
724                         }
725                 }
726                 
727                 //calculate rcumul
728                 for(int a = 0; a < numComp; a++) {
729                         float rcumul = 1.0000;
730                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
731                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
732                                 //make rscoreFreq map and rCumul
733                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
734                                 rCumul[a][it->first] = rcumul;
735                                 //get percentage of random trees with that info
736                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
737                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
738                         }
739                 }
740
741         }
742         catch(exception& e) {
743                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
744                 exit(1);
745         }
746 }
747 /*****************************************************************/
748 int UnifracWeightedCommand::readNamesFile() {
749         try {
750                 m->names.clear();
751                 numUniquesInName = 0;
752                 
753                 ifstream in;
754                 m->openInputFile(namefile, in);
755                 
756                 string first, second;
757                 map<string, string>::iterator itNames;
758                 
759                 while(!in.eof()) {
760                         in >> first >> second; m->gobble(in);
761                         
762                         numUniquesInName++;
763                         
764                         itNames = m->names.find(first);
765                         if (itNames == m->names.end()) {  
766                                 m->names[first] = second; 
767                                 
768                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
769                                 vector<string> dupNames;
770                                 m->splitAtComma(second, dupNames);
771                                 
772                                 for (int i = 0; i < dupNames.size(); i++) {     
773                                         nameMap[dupNames[i]] = dupNames[i]; 
774                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
775                                 }
776                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
777                 }
778                 in.close();
779                 
780                 return 0;
781         }
782         catch(exception& e) {
783                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
784                 exit(1);
785         }
786 }
787 /***********************************************************/
788
789
790
791
792