]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
fixed unifrac bug with multiple processors if numComps was less than processors....
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = 0; l < r; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                         
63                                         if(i == processors - 1){
64                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
65                                         }
66                 
67                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
68                                 }
69                                 data = createProcesses(t, namesOfGroupCombos);
70                                 lines.clear();
71                         }
72                 #else
73                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
74                 #endif
75                 
76                 return data;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "Unweighted", "getValues");
80                 exit(1);
81         }
82 }
83 /**************************************************************************************************/
84
85 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
86         try {
87 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
88                 int process = 1;
89                 vector<int> processIDS;
90                 
91                 EstOutput results;
92                 
93                 //loop through and create all the processes you want
94                 while (process != processors) {
95                         int pid = fork();
96                         
97                         if (pid > 0) {
98                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
99                                 process++;
100                         }else if (pid == 0){
101                                 EstOutput myresults;
102                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
103                                 
104                                 if (m->control_pressed) { exit(0); }
105                                 
106                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
107                                 
108                                 //pass numSeqs to parent
109                                 ofstream out;
110                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
111                                 m->openOutputFile(tempFile, out);
112                                 out << myresults.size() << endl;
113                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
114                                 out.close();
115                                 
116                                 exit(0);
117                         }else { 
118                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
119                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
120                                 exit(0); 
121                         }
122                 }
123                 
124                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
125                 
126                 //force parent to wait until all the processes are done
127                 for (int i=0;i<(processors-1);i++) { 
128                         int temp = processIDS[i];
129                         wait(&temp);
130                 }
131                 
132                 if (m->control_pressed) { return results; }
133                 
134                 //get data created by processes
135                 for (int i=0;i<(processors-1);i++) { 
136                         ifstream in;
137                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
138                         m->openInputFile(s, in);
139                         
140                         //get quantiles
141                         if (!in.eof()) {
142                                 int num;
143                                 in >> num; m->gobble(in);
144                                 
145                                 if (m->control_pressed) { break; }
146                                 
147                                 double w; 
148                                 for (int j = 0; j < num; j++) {
149                                         in >> w;
150                                         results.push_back(w);
151                                 }
152                                 m->gobble(in);
153                         }
154                         in.close();
155                         remove(s.c_str());
156                 }
157                 
158                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
159                 
160                 return results;
161 #endif          
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "Unweighted", "createProcesses");
165                 exit(1);
166         }
167 }
168 /**************************************************************************************************/
169 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
170  try {
171                 
172                 EstOutput results; results.resize(num);
173                 
174                 int count = 0;
175                 int numLeaves = t->getNumLeaves();
176                 int total = num;
177                 int twentyPercent = (total * 0.20);
178                 if (twentyPercent == 0) { twentyPercent = 1; }
179                 
180                 
181                 for (int h = start; h < (start+num); h++) {
182         //cout << namesOfGroupCombos[h][0] << '\t' << namesOfGroupCombos[h][1] << endl;         
183                         if (m->control_pressed) { return results; }
184                 
185                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
186                         double totalBL = 0.00;  //all branch lengths
187                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
188                         map<int, double> tempTotals; //maps node to total Branch Length
189                         map<int, int> nodePcountSize; //maps node to pcountSize
190                         map<int, int>::iterator itCount;
191                                 
192                         for(int i=0;i<t->getNumNodes();i++){
193                         
194                                 if (m->control_pressed) {  return data; }
195                                 
196                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
197                                 //pcountSize = 2, not unique to one group
198                                 //pcountSize = 1, unique to one group
199                                 
200                                 int pcountSize = 0;
201                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
202                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
203                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
204                                 }
205
206                                 nodePcountSize[i] = pcountSize;
207                                 
208                                 //cout << i << '\t' << t->tree[i].getName() << " br = " << abs(t->tree[i].getBranchLength()) << '\t';           
209                                 if (pcountSize == 0) { }
210                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength()); }
211                                 
212                                 //if you are a leaf from a users group add to total
213                                 if (i < numLeaves) {
214                                         if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
215                                         //cout << "added to total" << endl; 
216                                                 totalBL += abs(t->tree[i].getBranchLength()); 
217                                         }
218                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
219                                 }else{ //if you are not a leaf 
220                                         //do both your chidren have have descendants from the users groups? 
221                                         int lc = t->tree[i].getLChild();
222                                         int rc = t->tree[i].getRChild();
223                                         
224                                         //if yes, add your childrens tempTotals
225                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
226                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
227                                                 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
228                                                 if (t->tree[i].getBranchLength() != -1) {
229                                                         tempTotals[i] = abs(t->tree[i].getBranchLength());
230                                                 }else {
231                                                         tempTotals[i] = 0.0;
232                                                 }
233                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
234                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
235                                                 tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[i].getBranchLength()); 
236                                         }
237                                         //cout << "temptotal = "<< tempTotals[i] << endl;
238                                 }
239
240                         }
241         //cout << UniqueBL << '\t' << totalBL << endl;          
242                         UW = (UniqueBL / totalBL);  
243         
244                         if (isnan(UW) || isinf(UW)) { UW = 0; }
245         
246                         results[count] = UW;
247                         count++;
248
249                         //report progress
250                         if((count % twentyPercent) == 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
251                 }
252                 
253                 //report progress
254                 if((count % twentyPercent) != 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
255                 
256                 return results; 
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "Unweighted", "driver");
260                 exit(1);
261         }
262 }
263 /**************************************************************************************************/
264
265 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
266  try {
267                 globaldata = GlobalData::getInstance();
268                 processors = p;
269                 outputDir = o;
270                 
271                 
272                 //if the users enters no groups then give them the score of all groups
273                 int numGroups = globaldata->Groups.size();
274                 
275                 //calculate number of comparsions
276                 int numComp = 0;
277                 vector< vector<string> > namesOfGroupCombos;
278                 for (int r=0; r<numGroups; r++) { 
279                         for (int l = 0; l < r; l++) {
280                                 numComp++;
281                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
282                                 namesOfGroupCombos.push_back(groups);
283                         }
284                 }
285                 
286                 if (numComp != 1) {
287                         vector<string> groups;
288                         if (numGroups == 0) {
289                                 //get score for all users groups
290                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
291                                         if (tmap->namesOfGroups[i] != "xxx") {
292                                                 groups.push_back(tmap->namesOfGroups[i]);
293                                         }
294                                 }
295                                 namesOfGroupCombos.push_back(groups);
296                         }else {
297                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
298                                         groups.push_back(globaldata->Groups[i]);
299                                 }
300                                 namesOfGroupCombos.push_back(groups);
301                         }
302                 }
303
304                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
305                         if(processors == 1){
306                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
307                         }else{
308                                 int numPairs = namesOfGroupCombos.size();
309                                 
310                                 int numPairsPerProcessor = numPairs / processors;
311                                 
312                                 for (int i = 0; i < processors; i++) {
313                                         int startPos = i * numPairsPerProcessor;
314                                         if(i == processors - 1){
315                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
316                                         }
317                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
318                                 }
319                                         
320                                 data = createProcesses(t, namesOfGroupCombos, true);
321                                 
322                                 lines.clear();
323                         }
324                 #else
325                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
326                 #endif
327         
328                 return data;
329         }
330         catch(exception& e) {
331                 m->errorOut(e, "Unweighted", "getValues");
332                 exit(1);
333         }
334 }
335 /**************************************************************************************************/
336
337 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
338         try {
339 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
340                 int process = 1;
341                 vector<int> processIDS;
342                 
343                 EstOutput results;
344                 
345                 //loop through and create all the processes you want
346                 while (process != processors) {
347                         int pid = fork();
348                         
349                         if (pid > 0) {
350                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
351                                 process++;
352                         }else if (pid == 0){
353                                 EstOutput myresults;
354                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
355                                 
356                                 if (m->control_pressed) { exit(0); }
357                                 
358                                 //pass numSeqs to parent
359                                 ofstream out;
360                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
361                                 m->openOutputFile(tempFile, out);
362                                 out << myresults.size() << endl;
363                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
364                                 out.close();
365                                 
366                                 exit(0);
367                         }else { 
368                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
369                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
370                                 exit(0); 
371                         }
372                 }
373                 
374                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
375                 
376                 //force parent to wait until all the processes are done
377                 for (int i=0;i<(processors-1);i++) { 
378                         int temp = processIDS[i];
379                         wait(&temp);
380                 }
381                 
382                 if (m->control_pressed) { return results; }
383                 
384                 //get data created by processes
385                 for (int i=0;i<(processors-1);i++) { 
386                         ifstream in;
387                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
388                         m->openInputFile(s, in);
389                         
390                         //get quantiles
391                         if (!in.eof()) {
392                                 int num;
393                                 in >> num; m->gobble(in);
394                                         
395                                 if (m->control_pressed) { break; }
396                                 
397                                 double w; 
398                                 for (int j = 0; j < num; j++) {
399                                         in >> w;
400                                         
401                                         results.push_back(w);
402                                 }
403                                 m->gobble(in);
404                         }
405                         in.close();
406                         remove(s.c_str());
407                 }
408                 
409                 return results;
410 #endif          
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "Unweighted", "createProcesses");
414                 exit(1);
415         }
416 }
417 /**************************************************************************************************/
418 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
419  try {
420                 
421                 EstOutput results; results.resize(num);
422                 
423                 int count = 0;
424                 int numLeaves = t->getNumLeaves();
425                 
426                 Tree* copyTree = new Tree;
427                 
428                 for (int h = start; h < (start+num); h++) {
429                 
430                         if (m->control_pressed) { return results; }
431                 
432                         //copy random tree passed in
433                         copyTree->getCopy(t);
434                                 
435                         //swap labels in the groups you want to compare
436                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
437                         
438                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
439                         double totalBL = 0.00;  //all branch lengths
440                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
441                         map<int, double> tempTotals; //maps node to total Branch Length
442                         map<int, int> nodePcountSize; //maps node to pcountSize
443                                 
444                         for(int i=0;i<copyTree->getNumNodes();i++){
445                         
446                                 if (m->control_pressed) {  return data; }
447                                 
448                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
449                                 //pcountSize = 2, not unique to one group
450                                 //pcountSize = 1, unique to one group
451                                 
452                                 int pcountSize = 0;
453                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
454                                         map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
455                                         if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
456                                 }
457                                 
458                                 nodePcountSize[i] = pcountSize;
459                         
460                                 if (pcountSize == 0) { }
461                                 else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());      }
462                                 
463                                 //if you are a leaf from a users group add to total
464                                 if (i < numLeaves) {
465                                         if ((copyTree->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
466                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
467                                         }
468                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
469                                 }else{ //if you are not a leaf 
470                                         //do both your chidren have have descendants from the users groups? 
471                                         int lc = copyTree->tree[i].getLChild();
472                                         int rc = copyTree->tree[i].getRChild();
473                                         
474                                         //if yes, add your childrens tempTotals
475                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
476                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
477                                                 
478                                                 if (copyTree->tree[i].getBranchLength() != -1) {
479                                                         tempTotals[i] = abs(copyTree->tree[i].getBranchLength());
480                                                 }else {
481                                                         tempTotals[i] = 0.0;
482                                                 }
483                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
484                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
485                                                 tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(copyTree->tree[i].getBranchLength()); 
486                                         }
487                                         
488                                 }
489
490                         }
491                 
492                         UW = (UniqueBL / totalBL);  
493         
494                         if (isnan(UW) || isinf(UW)) { UW = 0; }
495         
496                         results[count] = UW;
497                         count++;
498
499                 }
500                 
501                 delete copyTree;
502                 
503                 return results; 
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "Unweighted", "driver");
507                 exit(1);
508         }
509 }
510 /**************************************************************************************************/
511
512