]> git.donarmstrong.com Git - mothur.git/blob - weighted.cpp
working on parallelizing unifrac.weighted.
[mothur.git] / weighted.cpp
1 /*
2  *  weighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "weighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Weighted::getValues(Tree* t, int p, string o) {
15     try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 data.clear(); //clear out old values
19                 int numGroups;
20                 vector<double> D;
21                 processors = p;
22                 outputDir = o;
23                 
24                 numGroups = globaldata->Groups.size();
25         
26                 vector<double> sums = getBranchLengthSums(t);
27                 
28                 if (m->control_pressed) { return data; }
29                 
30                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
31                 vector< vector<string> > namesOfGroupCombos;
32                 for (int i=0; i<numGroups; i++) { 
33                         for (int l = i+1; l < numGroups; l++) { 
34                                 //initialize weighted scores
35                                 //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
36                                 vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
37                                 namesOfGroupCombos.push_back(groups);
38                         }
39                 }
40                 
41                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
42                         if(processors == 1){
43                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
44                         }else{
45                                 int numPairs = namesOfGroupCombos.size();
46                                 
47                                 int numPairsPerProcessor = numPairs / processors;
48                                 
49                                 for (int i = 0; i < processors; i++) {
50                                         int startPos = i * numPairsPerProcessor;
51                                         if(i == processors - 1){
52                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
53                                         }
54                                         lines.push_back(new linePair(startPos, numPairsPerProcessor));
55                                 }
56
57                                 data = createProcesses(t, namesOfGroupCombos, sums);
58                                 
59                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
60                         }
61                 #else
62                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
63                 #endif
64                 
65                 return data;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "Weighted", "getValues");
69                 exit(1);
70         }
71 }
72 /**************************************************************************************************/
73
74 EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector<double>& sums) {
75         try {
76 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
77                 int process = 0;
78                 int num = 0;
79                 vector<int> processIDS;
80                 
81                 EstOutput results;
82                 
83                 //loop through and create all the processes you want
84                 while (process != processors) {
85                         int pid = fork();
86                         
87                         if (pid > 0) {
88                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
89                                 process++;
90                         }else if (pid == 0){
91                                 results = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums);
92                                 
93                                 if (m->control_pressed) { exit(0); }
94                                 
95                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
96                                 
97                                 //pass numSeqs to parent
98                                 ofstream out;
99                                 string tempFile = outputDir + toString(getpid()) + ".results.temp";
100                                 m->openOutputFile(tempFile, out);
101                                 out << results.size() << endl;
102                                 for (int i = 0; i < results.size(); i++) {  out << results[i] << '\t';  } out << endl;
103                                 out.close();
104                                 
105                                 exit(0);
106                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
107                 }
108                 
109                 //force parent to wait until all the processes are done
110                 for (int i=0;i<processors;i++) { 
111                         int temp = processIDS[i];
112                         wait(&temp);
113                 }
114                 
115                 if (m->control_pressed) { return results; }
116                 
117                 //get data created by processes
118                 for (int i=0;i<processors;i++) { 
119                         ifstream in;
120                         string s = outputDir + toString(processIDS[i]) + ".results.temp";
121                         m->openInputFile(s, in);
122                         
123                         vector<double> r;
124                         
125                         //get quantiles
126                         while (!in.eof()) {
127                                 int num;
128                                 in >> num; 
129                                 
130                                 if (m->control_pressed) { break; }
131                                 
132                                 m->gobble(in);
133
134                                 double w; 
135                                 for (int j = 0; j < num; j++) {
136                                         in >> w;
137                                         r.push_back(w);
138                                 }
139                                 m->gobble(in);
140                         }
141                         in.close();
142                         remove(s.c_str());
143         
144                         //save quan in quantiles
145                         for (int j = 0; j < r.size(); j++) {
146                                 //put all values of r into results
147                                 results.push_back(r[j]);   
148                         }
149                 }
150                 
151                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
152                 
153                 return results;
154 #endif          
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "Weighted", "createProcesses");
158                 exit(1);
159         }
160 }
161 /**************************************************************************************************/
162 EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector<double>& sums) { 
163  try {
164                 globaldata = GlobalData::getInstance();
165                 
166                 EstOutput results;
167                 vector<double> D;
168                 
169                 int count = 0;
170                 int total = start+num;
171                 int twentyPercent = (total * 0.20);
172
173                 for (int h = start; h < (start+num); h++) {
174                 
175                         if (m->control_pressed) { return results; }
176                 
177                         //initialize weighted score
178                         string groupA = namesOfGroupCombos[h][0]; 
179                         string groupB = namesOfGroupCombos[h][1];
180                         
181                         WScore[groupA+groupB] = 0.0;
182                         D.push_back(0.0000); //initialize a spot in D for each combination
183                         
184                         //adding the wieghted sums from group i
185                         for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i
186                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupA][j]].pcount.find(groupA);
187                                 int numSeqsInGroupI = it->second;
188                                 
189                                 double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groupA][j]]) / (double)tmap->seqsPerGroup[groupA]);
190                         
191                                 D[count] += weightedSum;
192                         }
193                         
194                         //adding the wieghted sums from group l
195                         for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
196                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB);
197                                 int numSeqsInGroupL = it->second;
198                                 
199                                 double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groupB][j]]) / (double)tmap->seqsPerGroup[groupB]);
200                         
201                                 D[count] += weightedSum;
202                         }
203                         count++;
204                         
205                         //report progress
206                         if((h) % twentyPercent == 0){   m->mothurOut("Percentage complete: " + toString(int((h / (float)total) * 100.0))); m->mothurOutEndLine();               }
207                 }
208                 
209                 m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine();
210                 
211                 //calculate u for the group comb 
212                 for(int i=0;i<t->getNumNodes();i++){
213                         for (int h = start; h < (start+num); h++) {
214                                 
215                                 string groupA = namesOfGroupCombos[h][0]; 
216                                 string groupB = namesOfGroupCombos[h][1];
217
218                                 if (m->control_pressed) { return results; }
219                                 
220                                 double u;
221                                 //does this node have descendants from groupA
222                                 it = t->tree[i].pcount.find(groupA);
223                                 //if it does u = # of its descendants with a certain group / total number in tree with a certain group
224                                 if (it != t->tree[i].pcount.end()) {
225                                         u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
226                                 }else { u = 0.00; }
227                         
228                                                         
229                                 //does this node have descendants from group l
230                                 it = t->tree[i].pcount.find(groupB);
231                                 //if it does subtract their percentage from u
232                                 if (it != t->tree[i].pcount.end()) {
233                                         u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
234                                 }
235                                                         
236                                 u = abs(u * t->tree[i].getBranchLength());
237                                                 
238                                 //save groupcombs u value
239                                 WScore[(groupA+groupB)] += u;
240                         }
241                 }
242                 
243                 /********************************************************/
244                 
245                 //calculate weighted score for the group combination
246                 double UN;      
247                 count = 0;
248                 for (int h = start; h < (start+num); h++) {
249                         UN = (WScore[namesOfGroupCombos[h][0]+namesOfGroupCombos[h][1]] / D[count]);
250                 
251                         if (isnan(UN) || isinf(UN)) { UN = 0; } 
252                         results.push_back(UN);
253                         count++;
254                 }
255                                 
256                 return results; 
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "Weighted", "driver");
260                 exit(1);
261         }
262 }
263 /**************************************************************************************************/
264 EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector<double>& sums) { 
265  try {
266                 globaldata = GlobalData::getInstance();
267                 
268                 data.clear(); //clear out old values
269                 
270                 if (m->control_pressed) { return data; }
271                 
272                 //initialize weighted score
273                 WScore[(groupA+groupB)] = 0.0;
274                 double D = 0.0;
275                 
276                 vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
277                 
278                 //adding the wieghted sums from group i
279                 for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
280                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
281                         int numSeqsInGroupI = it->second;
282                         
283                         double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]);
284                 
285                         D += weightedSum;
286                 }
287                 
288                 //adding the wieghted sums from group l
289                 for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
290                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
291                         int numSeqsInGroupL = it->second;
292                         
293                         double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]);
294                 
295                         D += weightedSum;
296                 }
297                 
298                 //calculate u for the group comb 
299                 for(int i=0;i<t->getNumNodes();i++){
300                 
301                         if (m->control_pressed) { return data; }
302                         
303                         double u;
304                         //does this node have descendants from groupA
305                         it = t->tree[i].pcount.find(groupA);
306                         //if it does u = # of its descendants with a certain group / total number in tree with a certain group
307                         if (it != t->tree[i].pcount.end()) {
308                                 u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
309                         }else { u = 0.00; }
310                 
311                                                 
312                         //does this node have descendants from group l
313                         it = t->tree[i].pcount.find(groupB);
314                         //if it does subtract their percentage from u
315                         if (it != t->tree[i].pcount.end()) {
316                                 u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
317                         }
318                                                 
319                         u = abs(u * t->tree[i].getBranchLength());
320                                         
321                         //save groupcombs u value
322                         WScore[(groupA+groupB)] += u;
323                 }
324                 
325                 /********************************************************/
326                 
327                 //calculate weighted score for the group combination
328                 double UN;      
329                 UN = (WScore[(groupA+groupB)] / D);
330                 
331                 if (isnan(UN) || isinf(UN)) { UN = 0; } 
332                 data.push_back(UN);
333                                 
334                 return data; 
335         }
336         catch(exception& e) {
337                 m->errorOut(e, "Weighted", "getValues");
338                 exit(1);
339         }
340 }
341 /**************************************************************************************************/
342 vector<double> Weighted::getBranchLengthSums(Tree* t) { 
343         try {
344                 
345                 vector<double> sums;
346                 
347                 for(int v=0;v<t->getNumLeaves();v++){
348                         
349                         if (m->control_pressed) { return sums; }
350                                 
351                         int index = v;
352                         double sum = 0.0000;
353         
354                         //while you aren't at root
355                         while(t->tree[index].getParent() != -1){
356                                                 
357                                 //if you have a BL
358                                 if(t->tree[index].getBranchLength() != -1){
359                                         sum += abs(t->tree[index].getBranchLength());
360                                 }
361                                 index = t->tree[index].getParent();
362                         }
363                                         
364                         //get last breanch length added
365                         if(t->tree[index].getBranchLength() != -1){
366                                 sum += abs(t->tree[index].getBranchLength());
367                         }
368                         
369                         sums.push_back(sum);
370                 }
371                 
372                 return sums;
373         }
374         catch(exception& e) {
375                 m->errorOut(e, "Weighted", "getBranchLengthSums");
376                 exit(1);
377         }
378 }
379 /**************************************************************************************************/
380
381
382