]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
fixed unifrac.unweighted
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = 0; l < r; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                         
63                                         if(i == processors - 1){
64                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
65                                         }
66                 
67                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
68                                 }
69
70                                 data = createProcesses(t, namesOfGroupCombos);
71                                 lines.clear();
72                         }
73                 #else
74                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
75                 #endif
76                 
77                 return data;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "Unweighted", "getValues");
81                 exit(1);
82         }
83 }
84 /**************************************************************************************************/
85
86 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
87         try {
88 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
89                 int process = 1;
90                 int num = 0;
91                 vector<int> processIDS;
92                 
93                 EstOutput results;
94                 
95                 //loop through and create all the processes you want
96                 while (process != processors) {
97                         int pid = fork();
98                         
99                         if (pid > 0) {
100                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
101                                 process++;
102                         }else if (pid == 0){
103                                 EstOutput myresults;
104                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
105                                 
106                                 if (m->control_pressed) { exit(0); }
107                                 
108                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
109                                 
110                                 //pass numSeqs to parent
111                                 ofstream out;
112                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
113                                 m->openOutputFile(tempFile, out);
114                                 out << myresults.size() << endl;
115                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
116                                 out.close();
117                                 
118                                 exit(0);
119                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
120                 }
121                 
122                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
123                 
124                 //force parent to wait until all the processes are done
125                 for (int i=0;i<(processors-1);i++) { 
126                         int temp = processIDS[i];
127                         wait(&temp);
128                 }
129                 
130                 if (m->control_pressed) { return results; }
131                 
132                 //get data created by processes
133                 for (int i=0;i<(processors-1);i++) { 
134                         ifstream in;
135                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
136                         m->openInputFile(s, in);
137                         
138                         //get quantiles
139                         if (!in.eof()) {
140                                 int num;
141                                 in >> num; m->gobble(in);
142                                 
143                                 if (m->control_pressed) { break; }
144                                 
145                                 double w; 
146                                 for (int j = 0; j < num; j++) {
147                                         in >> w;
148                                         results.push_back(w);
149                                 }
150                                 m->gobble(in);
151                         }
152                         in.close();
153                         remove(s.c_str());
154                 }
155                 
156                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
157                 
158                 return results;
159 #endif          
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "Unweighted", "createProcesses");
163                 exit(1);
164         }
165 }
166 /**************************************************************************************************/
167 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
168  try {
169                 
170                 EstOutput results; results.resize(num);
171                 
172                 int count = 0;
173                 int numLeaves = t->getNumLeaves();
174                 int total = num;
175                 int twentyPercent = (total * 0.20);
176                 if (twentyPercent == 0) { twentyPercent = 1; }
177                 
178                 
179                 for (int h = start; h < (start+num); h++) {
180         //cout << namesOfGroupCombos[h][0] << '\t' << namesOfGroupCombos[h][1] << endl;         
181                         if (m->control_pressed) { return results; }
182                 
183                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
184                         double totalBL = 0.00;  //all branch lengths
185                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
186                         map<int, double> tempTotals; //maps node to total Branch Length
187                         map<int, int> nodePcountSize; //maps node to pcountSize
188                         map<int, int>::iterator itCount;
189                                 
190                         for(int i=0;i<t->getNumNodes();i++){
191                         
192                                 if (m->control_pressed) {  return data; }
193                                 
194                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
195                                 //pcountSize = 2, not unique to one group
196                                 //pcountSize = 1, unique to one group
197                                 
198                                 int pcountSize = 0;
199                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
200                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
201                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
202                                 }
203
204                                 nodePcountSize[i] = pcountSize;
205                                 
206                                 //cout << i << '\t' << t->tree[i].getName() << " br = " << abs(t->tree[i].getBranchLength()) << '\t';           
207                                 if (pcountSize == 0) { }
208                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength()); }
209                                 
210                                 //if you are a leaf from a users group add to total
211                                 if (i < numLeaves) {
212                                         if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
213                                         //cout << "added to total" << endl; 
214                                                 totalBL += abs(t->tree[i].getBranchLength()); 
215                                         }
216                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
217                                 }else{ //if you are not a leaf 
218                                         //do both your chidren have have descendants from the users groups? 
219                                         int lc = t->tree[i].getLChild();
220                                         int rc = t->tree[i].getRChild();
221                                         
222                                         //if yes, add your childrens tempTotals
223                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
224                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
225                                                 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
226                                                 if (t->tree[i].getBranchLength() != -1) {
227                                                         tempTotals[i] = abs(t->tree[i].getBranchLength());
228                                                 }else {
229                                                         tempTotals[i] = 0.0;
230                                                 }
231                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
232                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
233                                                 tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[i].getBranchLength()); 
234                                         }
235                                         //cout << "temptotal = "<< tempTotals[i] << endl;
236                                 }
237
238                         }
239         //cout << UniqueBL << '\t' << totalBL << endl;          
240                         UW = (UniqueBL / totalBL);  
241         
242                         if (isnan(UW) || isinf(UW)) { UW = 0; }
243         
244                         results[count] = UW;
245                         count++;
246
247                         //report progress
248                         if((count % twentyPercent) == 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
249                 }
250                 
251                 //report progress
252                 if((count % twentyPercent) != 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
253                 
254                 return results; 
255         }
256         catch(exception& e) {
257                 m->errorOut(e, "Unweighted", "driver");
258                 exit(1);
259         }
260 }
261 /**************************************************************************************************/
262
263 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
264  try {
265                 globaldata = GlobalData::getInstance();
266                 processors = p;
267                 outputDir = o;
268                 
269                 
270                 //if the users enters no groups then give them the score of all groups
271                 int numGroups = globaldata->Groups.size();
272                 
273                 //calculate number of comparsions
274                 int numComp = 0;
275                 vector< vector<string> > namesOfGroupCombos;
276                 for (int r=0; r<numGroups; r++) { 
277                         for (int l = 0; l < r; l++) {
278                                 numComp++;
279                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
280                                 namesOfGroupCombos.push_back(groups);
281                         }
282                 }
283                 
284                 if (numComp != 1) {
285                         vector<string> groups;
286                         if (numGroups == 0) {
287                                 //get score for all users groups
288                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
289                                         if (tmap->namesOfGroups[i] != "xxx") {
290                                                 groups.push_back(tmap->namesOfGroups[i]);
291                                         }
292                                 }
293                                 namesOfGroupCombos.push_back(groups);
294                         }else {
295                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
296                                         groups.push_back(globaldata->Groups[i]);
297                                 }
298                                 namesOfGroupCombos.push_back(groups);
299                         }
300                 }
301
302                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
303                         if(processors == 1){
304                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
305                         }else{
306                                 int numPairs = namesOfGroupCombos.size();
307                                 
308                                 int numPairsPerProcessor = numPairs / processors;
309                                 
310                                 for (int i = 0; i < processors; i++) {
311                                         int startPos = i * numPairsPerProcessor;
312                                         if(i == processors - 1){
313                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
314                                         }
315                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
316                                 }
317
318                                 data = createProcesses(t, namesOfGroupCombos, true);
319                                 
320                                 lines.clear();
321                         }
322                 #else
323                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
324                 #endif
325         
326                 return data;
327         }
328         catch(exception& e) {
329                 m->errorOut(e, "Unweighted", "getValues");
330                 exit(1);
331         }
332 }
333 /**************************************************************************************************/
334
335 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
336         try {
337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
338                 int process = 1;
339                 int num = 0;
340                 vector<int> processIDS;
341                 
342                 EstOutput results;
343                 
344                 //loop through and create all the processes you want
345                 while (process != processors) {
346                         int pid = fork();
347                         
348                         if (pid > 0) {
349                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
350                                 process++;
351                         }else if (pid == 0){
352                                 EstOutput myresults;
353                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
354                                 
355                                 if (m->control_pressed) { exit(0); }
356                                 
357                                 //pass numSeqs to parent
358                                 ofstream out;
359                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
360                                 m->openOutputFile(tempFile, out);
361                                 out << myresults.size() << endl;
362                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
363                                 out.close();
364                                 
365                                 exit(0);
366                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
367                 }
368                 
369                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
370                 
371                 //force parent to wait until all the processes are done
372                 for (int i=0;i<(processors-1);i++) { 
373                         int temp = processIDS[i];
374                         wait(&temp);
375                 }
376                 
377                 if (m->control_pressed) { return results; }
378                 
379                 //get data created by processes
380                 for (int i=0;i<(processors-1);i++) { 
381                         ifstream in;
382                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
383                         m->openInputFile(s, in);
384                         
385                         //get quantiles
386                         if (!in.eof()) {
387                                 int num;
388                                 in >> num; m->gobble(in);
389                                 
390                                 if (m->control_pressed) { break; }
391                                 
392                                 double w; 
393                                 for (int j = 0; j < num; j++) {
394                                         in >> w;
395                                         results.push_back(w);
396                                 }
397                                 m->gobble(in);
398                         }
399                         in.close();
400                         remove(s.c_str());
401                 }
402                 
403                 return results;
404 #endif          
405         }
406         catch(exception& e) {
407                 m->errorOut(e, "Unweighted", "createProcesses");
408                 exit(1);
409         }
410 }
411 /**************************************************************************************************/
412 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
413  try {
414                 
415                 EstOutput results; results.resize(num);
416                 
417                 int count = 0;
418                 int total = num;
419                 int twentyPercent = (total * 0.20);
420                 int numLeaves = t->getNumLeaves();
421                 
422                 Tree* copyTree = new Tree;
423                 
424                 for (int h = start; h < (start+num); h++) {
425                 
426                         if (m->control_pressed) { return results; }
427                 
428                         //copy random tree passed in
429                         copyTree->getCopy(t);
430                                 
431                         //swap labels in the groups you want to compare
432                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
433                         
434                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
435                         double totalBL = 0.00;  //all branch lengths
436                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
437                         map<int, double> tempTotals; //maps node to total Branch Length
438                         map<int, int> nodePcountSize; //maps node to pcountSize
439                                 
440                         for(int i=0;i<copyTree->getNumNodes();i++){
441                         
442                                 if (m->control_pressed) {  return data; }
443                                 
444                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
445                                 //pcountSize = 2, not unique to one group
446                                 //pcountSize = 1, unique to one group
447                                 
448                                 int pcountSize = 0;
449                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
450                                         map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
451                                         if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
452                                 }
453                                 
454                                 nodePcountSize[i] = pcountSize;
455                         
456                                 if (pcountSize == 0) { }
457                                 else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());      }
458                                 
459                                 //if you are a leaf from a users group add to total
460                                 if (i < numLeaves) {
461                                         if ((copyTree->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
462                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
463                                         }
464                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
465                                 }else{ //if you are not a leaf 
466                                         //do both your chidren have have descendants from the users groups? 
467                                         int lc = copyTree->tree[i].getLChild();
468                                         int rc = copyTree->tree[i].getRChild();
469                                         
470                                         //if yes, add your childrens tempTotals
471                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
472                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
473                                                 
474                                                 if (copyTree->tree[i].getBranchLength() != -1) {
475                                                         tempTotals[i] = abs(copyTree->tree[i].getBranchLength());
476                                                 }else {
477                                                         tempTotals[i] = 0.0;
478                                                 }
479                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
480                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
481                                                 tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(copyTree->tree[i].getBranchLength()); 
482                                         }
483                                         
484                                 }
485
486                         }
487                 
488                         UW = (UniqueBL / totalBL);  
489         
490                         if (isnan(UW) || isinf(UW)) { UW = 0; }
491         
492                         results[count] = UW;
493                         count++;
494
495                 }
496                 
497                 delete copyTree;
498                 
499                 return results; 
500         }
501         catch(exception& e) {
502                 m->errorOut(e, "Unweighted", "driver");
503                 exit(1);
504         }
505 }
506 /**************************************************************************************************/
507
508