]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
fixed bug in unifrac commands with unrooted trees
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = 0; l < r; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                         
63                                         if(i == processors - 1){
64                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
65                                         }
66                 
67                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
68                                 }
69                                 data = createProcesses(t, namesOfGroupCombos);
70                                 lines.clear();
71                         }
72                 #else
73                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
74                 #endif
75                 
76                 return data;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "Unweighted", "getValues");
80                 exit(1);
81         }
82 }
83 /**************************************************************************************************/
84
85 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
86         try {
87 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
88                 int process = 1;
89                 vector<int> processIDS;
90                 
91                 EstOutput results;
92                 
93                 //loop through and create all the processes you want
94                 while (process != processors) {
95                         int pid = fork();
96                         
97                         if (pid > 0) {
98                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
99                                 process++;
100                         }else if (pid == 0){
101                                 EstOutput myresults;
102                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
103                                 
104                                 if (m->control_pressed) { exit(0); }
105                                 
106                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
107                                 
108                                 //pass numSeqs to parent
109                                 ofstream out;
110                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
111                                 m->openOutputFile(tempFile, out);
112                                 out << myresults.size() << endl;
113                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
114                                 out.close();
115                                 
116                                 exit(0);
117                         }else { 
118                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
119                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
120                                 exit(0); 
121                         }
122                 }
123                 
124                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
125                 
126                 //force parent to wait until all the processes are done
127                 for (int i=0;i<(processors-1);i++) { 
128                         int temp = processIDS[i];
129                         wait(&temp);
130                 }
131                 
132                 if (m->control_pressed) { return results; }
133                 
134                 //get data created by processes
135                 for (int i=0;i<(processors-1);i++) { 
136                         ifstream in;
137                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
138                         m->openInputFile(s, in);
139                         
140                         //get quantiles
141                         if (!in.eof()) {
142                                 int num;
143                                 in >> num; m->gobble(in);
144                                 
145                                 if (m->control_pressed) { break; }
146                                 
147                                 double w; 
148                                 for (int j = 0; j < num; j++) {
149                                         in >> w;
150                                         results.push_back(w);
151                                 }
152                                 m->gobble(in);
153                         }
154                         in.close();
155                         remove(s.c_str());
156                 }
157                 
158                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
159                 
160                 return results;
161 #endif          
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "Unweighted", "createProcesses");
165                 exit(1);
166         }
167 }
168 /**************************************************************************************************/
169 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
170  try {
171                 
172                 EstOutput results; results.resize(num);
173                 
174                 int count = 0;
175                 int numLeaves = t->getNumLeaves();
176                 int total = num;
177                 int twentyPercent = (total * 0.20);
178                 if (twentyPercent == 0) { twentyPercent = 1; }
179                 
180                 
181                 for (int h = start; h < (start+num); h++) {
182         //cout << namesOfGroupCombos[h][0] << '\t' << namesOfGroupCombos[h][1] << endl;         
183                         if (m->control_pressed) { return results; }
184                 
185                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
186                         double totalBL = 0.00;  //all branch lengths
187                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
188                         map<int, double> tempTotals; //maps node to total Branch Length
189                         map<int, int> nodePcountSize; //maps node to pcountSize
190                         map<int, int>::iterator itCount;
191                                 
192                         for(int i=0;i<t->getNumNodes();i++){
193                         
194                                 if (m->control_pressed) {  return data; }
195                                 
196                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
197                                 //pcountSize = 2, not unique to one group
198                                 //pcountSize = 1, unique to one group
199                                 
200                                 int pcountSize = 0;
201                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
202                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
203                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
204                                 }
205
206                                 nodePcountSize[i] = pcountSize;
207                                 
208                                 //cout << i << '\t' << t->tree[i].getName() << " br = " << abs(t->tree[i].getBranchLength()) << '\t';           
209                                 if (pcountSize == 0) { }
210                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength()); }
211                                 
212                                 //if you are a leaf from a users group add to total
213                                 if (i < numLeaves) {
214                                         if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
215                                         //cout << "added to total" << endl; 
216                                                 totalBL += abs(t->tree[i].getBranchLength()); 
217                                         }
218                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
219                                 }else{ //if you are not a leaf 
220                                         //do both your chidren have have descendants from the users groups? 
221                                         int lc = t->tree[i].getLChild();
222                                         int rc = t->tree[i].getRChild();
223                                         
224                                         //if yes, add your childrens tempTotals
225                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
226                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
227                                                 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
228                                                 if (t->tree[i].getBranchLength() != -1) {
229                                                         tempTotals[i] = abs(t->tree[i].getBranchLength());
230                                                 }else {
231                                                         tempTotals[i] = 0.0;
232                                                 }
233                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
234                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
235                                                 if (t->tree[i].getBranchLength() != -1) {
236                                                         tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[i].getBranchLength()); 
237                                                 }else {
238                                                         tempTotals[i] = tempTotals[lc] + tempTotals[rc];
239                                                 }
240                                         }
241                                         //cout << "temptotal = "<< tempTotals[i] << endl;
242                                 }
243
244                         }
245         //cout << UniqueBL << '\t' << totalBL << endl;          
246                         UW = (UniqueBL / totalBL);  
247         
248                         if (isnan(UW) || isinf(UW)) { UW = 0; }
249         
250                         results[count] = UW;
251                         count++;
252
253                         //report progress
254                         if((count % twentyPercent) == 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
255                 }
256                 
257                 //report progress
258                 if((count % twentyPercent) != 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
259                 
260                 return results; 
261         }
262         catch(exception& e) {
263                 m->errorOut(e, "Unweighted", "driver");
264                 exit(1);
265         }
266 }
267 /**************************************************************************************************/
268
269 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
270  try {
271                 globaldata = GlobalData::getInstance();
272                 processors = p;
273                 outputDir = o;
274                 
275                 
276                 //if the users enters no groups then give them the score of all groups
277                 int numGroups = globaldata->Groups.size();
278                 
279                 //calculate number of comparsions
280                 int numComp = 0;
281                 vector< vector<string> > namesOfGroupCombos;
282                 for (int r=0; r<numGroups; r++) { 
283                         for (int l = 0; l < r; l++) {
284                                 numComp++;
285                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
286                                 namesOfGroupCombos.push_back(groups);
287                         }
288                 }
289                 
290                 if (numComp != 1) {
291                         vector<string> groups;
292                         if (numGroups == 0) {
293                                 //get score for all users groups
294                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
295                                         if (tmap->namesOfGroups[i] != "xxx") {
296                                                 groups.push_back(tmap->namesOfGroups[i]);
297                                         }
298                                 }
299                                 namesOfGroupCombos.push_back(groups);
300                         }else {
301                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
302                                         groups.push_back(globaldata->Groups[i]);
303                                 }
304                                 namesOfGroupCombos.push_back(groups);
305                         }
306                 }
307
308                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
309                         if(processors == 1){
310                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
311                         }else{
312                                 int numPairs = namesOfGroupCombos.size();
313                                 
314                                 int numPairsPerProcessor = numPairs / processors;
315                                 
316                                 for (int i = 0; i < processors; i++) {
317                                         int startPos = i * numPairsPerProcessor;
318                                         if(i == processors - 1){
319                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
320                                         }
321                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
322                                 }
323                                         
324                                 data = createProcesses(t, namesOfGroupCombos, true);
325                                 
326                                 lines.clear();
327                         }
328                 #else
329                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
330                 #endif
331         
332                 return data;
333         }
334         catch(exception& e) {
335                 m->errorOut(e, "Unweighted", "getValues");
336                 exit(1);
337         }
338 }
339 /**************************************************************************************************/
340
341 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
342         try {
343 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
344                 int process = 1;
345                 vector<int> processIDS;
346                 
347                 EstOutput results;
348                 
349                 //loop through and create all the processes you want
350                 while (process != processors) {
351                         int pid = fork();
352                         
353                         if (pid > 0) {
354                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
355                                 process++;
356                         }else if (pid == 0){
357                                 EstOutput myresults;
358                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
359                                 
360                                 if (m->control_pressed) { exit(0); }
361                                 
362                                 //pass numSeqs to parent
363                                 ofstream out;
364                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
365                                 m->openOutputFile(tempFile, out);
366                                 out << myresults.size() << endl;
367                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
368                                 out.close();
369                                 
370                                 exit(0);
371                         }else { 
372                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
373                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
374                                 exit(0); 
375                         }
376                 }
377                 
378                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
379                 
380                 //force parent to wait until all the processes are done
381                 for (int i=0;i<(processors-1);i++) { 
382                         int temp = processIDS[i];
383                         wait(&temp);
384                 }
385                 
386                 if (m->control_pressed) { return results; }
387                 
388                 //get data created by processes
389                 for (int i=0;i<(processors-1);i++) { 
390                         ifstream in;
391                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
392                         m->openInputFile(s, in);
393                         
394                         //get quantiles
395                         if (!in.eof()) {
396                                 int num;
397                                 in >> num; m->gobble(in);
398                                         
399                                 if (m->control_pressed) { break; }
400                                 
401                                 double w; 
402                                 for (int j = 0; j < num; j++) {
403                                         in >> w;
404                                         
405                                         results.push_back(w);
406                                 }
407                                 m->gobble(in);
408                         }
409                         in.close();
410                         remove(s.c_str());
411                 }
412                 
413                 return results;
414 #endif          
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "Unweighted", "createProcesses");
418                 exit(1);
419         }
420 }
421 /**************************************************************************************************/
422 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
423  try {
424                 
425                 EstOutput results; results.resize(num);
426                 
427                 int count = 0;
428                 int numLeaves = t->getNumLeaves();
429                 
430                 Tree* copyTree = new Tree;
431                 
432                 for (int h = start; h < (start+num); h++) {
433                 
434                         if (m->control_pressed) { return results; }
435                 
436                         //copy random tree passed in
437                         copyTree->getCopy(t);
438                                 
439                         //swap labels in the groups you want to compare
440                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
441                         
442                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
443                         double totalBL = 0.00;  //all branch lengths
444                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
445                         map<int, double> tempTotals; //maps node to total Branch Length
446                         map<int, int> nodePcountSize; //maps node to pcountSize
447                                 
448                         for(int i=0;i<copyTree->getNumNodes();i++){
449                         
450                                 if (m->control_pressed) {  return data; }
451                                 
452                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
453                                 //pcountSize = 2, not unique to one group
454                                 //pcountSize = 1, unique to one group
455                                 
456                                 int pcountSize = 0;
457                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
458                                         map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
459                                         if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
460                                 }
461                                 
462                                 nodePcountSize[i] = pcountSize;
463                         
464                                 if (pcountSize == 0) { }
465                                 else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());      }
466                                 
467                                 //if you are a leaf from a users group add to total
468                                 if (i < numLeaves) {
469                                         if ((copyTree->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
470                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
471                                         }
472                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
473                                 }else{ //if you are not a leaf 
474                                         //do both your chidren have have descendants from the users groups? 
475                                         int lc = copyTree->tree[i].getLChild();
476                                         int rc = copyTree->tree[i].getRChild();
477                                         
478                                         //if yes, add your childrens tempTotals
479                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
480                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
481                                                 
482                                                 if (copyTree->tree[i].getBranchLength() != -1) {
483                                                         tempTotals[i] = abs(copyTree->tree[i].getBranchLength());
484                                                 }else {
485                                                         tempTotals[i] = 0.0;
486                                                 }
487                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
488                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
489                                                 if (t->tree[i].getBranchLength() != -1) {
490                                                         tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(copyTree->tree[i].getBranchLength()); 
491                                                 }else {
492                                                         tempTotals[i] = tempTotals[lc] + tempTotals[rc];
493                                                 }
494                                         }
495                                         
496                                 }
497
498                         }
499                 
500                         UW = (UniqueBL / totalBL);  
501         
502                         if (isnan(UW) || isinf(UW)) { UW = 0; }
503         
504                         results[count] = UW;
505                         count++;
506
507                 }
508                 
509                 delete copyTree;
510                 
511                 return results; 
512         }
513         catch(exception& e) {
514                 m->errorOut(e, "Unweighted", "driver");
515                 exit(1);
516         }
517 }
518 /**************************************************************************************************/
519
520