]> git.donarmstrong.com Git - mothur.git/blob - weighted.cpp
working on unifrac parallelization
[mothur.git] / weighted.cpp
1 /*
2  *  weighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "weighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Weighted::getValues(Tree* t, int p, string o) {
15     try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 data.clear(); //clear out old values
19                 int numGroups;
20                 vector<double> D;
21                 processors = p;
22                 outputDir = o;
23                 
24                 numGroups = globaldata->Groups.size();
25         
26                 vector<double> sums = getBranchLengthSums(t);
27                 
28                 if (m->control_pressed) { return data; }
29                 
30                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
31                 vector< vector<string> > namesOfGroupCombos;
32                 for (int i=0; i<numGroups; i++) { 
33                         for (int l = i+1; l < numGroups; l++) { 
34                                 //initialize weighted scores
35                                 //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
36                                 vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
37                                 namesOfGroupCombos.push_back(groups);
38                         }
39                 }
40                 
41                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
42                         if(processors == 1){
43                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
44                         }else{
45                                 int numPairs = namesOfGroupCombos.size();
46                                 
47                                 int numPairsPerProcessor = numPairs / processors;
48                                 
49                                 for (int i = 0; i < processors; i++) {
50                                         int startPos = i * numPairsPerProcessor;
51                                         if(i == processors - 1){
52                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
53                                         }
54                                         lines.push_back(new linePair(startPos, numPairsPerProcessor));
55                                 }
56
57                                 data = createProcesses(t, namesOfGroupCombos, sums);
58                                 
59                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
60                         }
61                 #else
62                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
63                 #endif
64                 
65                 return data;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "Weighted", "getValues");
69                 exit(1);
70         }
71 }
72 /**************************************************************************************************/
73
74 EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector<double>& sums) {
75         try {
76 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
77                 int process = 1;
78                 int num = 0;
79                 vector<int> processIDS;
80                 
81                 EstOutput results;
82                 
83                 //loop through and create all the processes you want
84                 while (process != processors) {
85                         int pid = fork();
86                         
87                         if (pid > 0) {
88                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
89                                 process++;
90                         }else if (pid == 0){
91         
92                                 EstOutput Myresults;
93                                 Myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums);
94                         
95                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
96                                 
97                                 //pass numSeqs to parent
98                                 ofstream out;
99
100                                 string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp";
101         
102                                 m->openOutputFile(tempFile, out);
103         
104                                 out << Myresults.size() << endl;
105                                 for (int i = 0; i < Myresults.size(); i++) {  out << Myresults[i] << '\t';  } out << endl;
106                                 out.close();
107                                 
108                                 exit(0);
109                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
110                 }
111         
112                 results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num, sums);
113         
114                 //force parent to wait until all the processes are done
115                 for (int i=0;i<(processors-1);i++) { 
116                         int temp = processIDS[i];
117                         wait(&temp);
118                 }
119         
120                 if (m->control_pressed) { return results; }
121                 
122                 //get data created by processes
123                 for (int i=0;i<(processors-1);i++) { 
124                         ifstream in;
125                         string s = outputDir + toString(processIDS[i]) + ".weighted.results.temp";
126                         m->openInputFile(s, in);
127                         
128                         //get quantiles
129                         while (!in.eof()) {
130                                 int num;
131                                 in >> num; m->gobble(in);
132                                 
133                                 if (m->control_pressed) { break; }
134
135                                 double w; 
136                                 for (int j = 0; j < num; j++) {
137                                         in >> w;
138                                         results.push_back(w);
139                                 }
140                                 m->gobble(in);
141                         }
142                         in.close();
143                         remove(s.c_str());
144                 }
145                 
146                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
147                 
148                 return results;
149 #endif          
150         }
151         catch(exception& e) {
152                 m->errorOut(e, "Weighted", "createProcesses");
153                 exit(1);
154         }
155 }
156 /**************************************************************************************************/
157 EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector<double>& sums) { 
158  try {
159                 EstOutput results;
160                 vector<double> D;
161                 
162                 int count = 0;
163                 for (int h = start; h < (start+num); h++) {
164                 
165                         if (m->control_pressed) { return results; }
166                 
167                         //initialize weighted score
168                         string groupA = namesOfGroupCombos[h][0]; 
169                         string groupB = namesOfGroupCombos[h][1];
170                         
171                         WScore[groupA+groupB] = 0.0;
172                         D.push_back(0.0000); //initialize a spot in D for each combination
173                         
174                         //adding the wieghted sums from group i
175                         for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i
176                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupA][j]].pcount.find(groupA);
177                                 int numSeqsInGroupI = it->second;
178                                 
179                                 double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groupA][j]]) / (double)tmap->seqsPerGroup[groupA]);
180                         
181                                 D[count] += weightedSum;
182                         }
183                         
184                         //adding the wieghted sums from group l
185                         for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
186                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB);
187                                 int numSeqsInGroupL = it->second;
188                                 
189                                 double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groupB][j]]) / (double)tmap->seqsPerGroup[groupB]);
190                         
191                                 D[count] += weightedSum;
192                         }
193                         count++;
194                 }
195                 
196                 //calculate u for the group comb 
197                 int total = t->getNumNodes();
198                 int twentyPercent = (total * 0.20);
199                 for(int i=0;i<t->getNumNodes();i++){
200                         for (int h = start; h < (start+num); h++) {
201                                 
202                                 string groupA = namesOfGroupCombos[h][0]; 
203                                 string groupB = namesOfGroupCombos[h][1];
204
205                                 if (m->control_pressed) { return results; }
206                                 
207                                 double u;
208                                 //does this node have descendants from groupA
209                                 it = t->tree[i].pcount.find(groupA);
210                                 //if it does u = # of its descendants with a certain group / total number in tree with a certain group
211                                 if (it != t->tree[i].pcount.end()) {
212                                         u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
213                                 }else { u = 0.00; }
214                         
215                                                         
216                                 //does this node have descendants from group l
217                                 it = t->tree[i].pcount.find(groupB);
218                                 //if it does subtract their percentage from u
219                                 if (it != t->tree[i].pcount.end()) {
220                                         u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
221                                 }
222                                                         
223                                 u = abs(u * t->tree[i].getBranchLength());
224                                                 
225                                 //save groupcombs u value
226                                 WScore[(groupA+groupB)] += u;
227                         }
228                         //report progress
229                         if((i) % twentyPercent == 0){   m->mothurOut("Percentage complete: " + toString(int((i / (float)total) * 100.0))); m->mothurOutEndLine();               }
230                 }
231                 
232                 m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine();
233                 
234                 /********************************************************/
235                 //calculate weighted score for the group combination
236                 double UN;      
237                 count = 0;
238                 for (int h = start; h < (start+num); h++) {
239                         UN = (WScore[namesOfGroupCombos[h][0]+namesOfGroupCombos[h][1]] / D[count]);
240                 
241                         if (isnan(UN) || isinf(UN)) { UN = 0; } 
242                         results.push_back(UN);
243                         count++;
244                 }
245                                 
246                 return results; 
247         }
248         catch(exception& e) {
249                 m->errorOut(e, "Weighted", "driver");
250                 exit(1);
251         }
252 }
253 /**************************************************************************************************/
254 EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector<double>& sums) { 
255  try {
256                 globaldata = GlobalData::getInstance();
257                 
258                 data.clear(); //clear out old values
259                 
260                 if (m->control_pressed) { return data; }
261                 
262                 //initialize weighted score
263                 WScore[(groupA+groupB)] = 0.0;
264                 double D = 0.0;
265                 
266                 vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
267                 
268                 //adding the wieghted sums from group i
269                 for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
270                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
271                         int numSeqsInGroupI = it->second;
272                         
273                         double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]);
274                 
275                         D += weightedSum;
276                 }
277                 
278                 //adding the wieghted sums from group l
279                 for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
280                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
281                         int numSeqsInGroupL = it->second;
282                         
283                         double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]);
284                 
285                         D += weightedSum;
286                 }
287                 
288                 //calculate u for the group comb 
289                 for(int i=0;i<t->getNumNodes();i++){
290                 
291                         if (m->control_pressed) { return data; }
292                         
293                         double u;
294                         //does this node have descendants from groupA
295                         it = t->tree[i].pcount.find(groupA);
296                         //if it does u = # of its descendants with a certain group / total number in tree with a certain group
297                         if (it != t->tree[i].pcount.end()) {
298                                 u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
299                         }else { u = 0.00; }
300                 
301                                                 
302                         //does this node have descendants from group l
303                         it = t->tree[i].pcount.find(groupB);
304                         //if it does subtract their percentage from u
305                         if (it != t->tree[i].pcount.end()) {
306                                 u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
307                         }
308                                                 
309                         u = abs(u * t->tree[i].getBranchLength());
310                                         
311                         //save groupcombs u value
312                         WScore[(groupA+groupB)] += u;
313                 }
314                 
315                 /********************************************************/
316                 
317                 //calculate weighted score for the group combination
318                 double UN;      
319                 UN = (WScore[(groupA+groupB)] / D);
320                 
321                 if (isnan(UN) || isinf(UN)) { UN = 0; } 
322                 data.push_back(UN);
323                                 
324                 return data; 
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "Weighted", "getValues");
328                 exit(1);
329         }
330 }
331 /**************************************************************************************************/
332 vector<double> Weighted::getBranchLengthSums(Tree* t) { 
333         try {
334                 
335                 vector<double> sums;
336                 
337                 for(int v=0;v<t->getNumLeaves();v++){
338                         
339                         if (m->control_pressed) { return sums; }
340                                 
341                         int index = v;
342                         double sum = 0.0000;
343         
344                         //while you aren't at root
345                         while(t->tree[index].getParent() != -1){
346                                                 
347                                 //if you have a BL
348                                 if(t->tree[index].getBranchLength() != -1){
349                                         sum += abs(t->tree[index].getBranchLength());
350                                 }
351                                 index = t->tree[index].getParent();
352                         }
353                                         
354                         //get last breanch length added
355                         if(t->tree[index].getBranchLength() != -1){
356                                 sum += abs(t->tree[index].getBranchLength());
357                         }
358                         
359                         sums.push_back(sum);
360                 }
361                 
362                 return sums;
363         }
364         catch(exception& e) {
365                 m->errorOut(e, "Weighted", "getBranchLengthSums");
366                 exit(1);
367         }
368 }
369 /**************************************************************************************************/
370
371
372