]> git.donarmstrong.com Git - mothur.git/blob - weighted.cpp
c9ad81295aaacb3bd4ae7eb8f7e8ca7c1cef9b80
[mothur.git] / weighted.cpp
1 /*
2  *  weighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "weighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Weighted::getValues(Tree* t, int p, string o) {
15     try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 data.clear(); //clear out old values
19                 int numGroups;
20                 vector<double> D;
21                 processors = p;
22                 outputDir = o;
23                 
24                 numGroups = globaldata->Groups.size();
25         
26                 vector<double> sums = getBranchLengthSums(t);
27                 
28                 if (m->control_pressed) { return data; }
29                 
30                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
31                 vector< vector<string> > namesOfGroupCombos;
32                 for (int i=0; i<numGroups; i++) { 
33                         for (int l = i+1; l < numGroups; l++) { 
34                                 //initialize weighted scores
35                                 //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
36                                 vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
37                                 namesOfGroupCombos.push_back(groups);
38                         }
39                 }
40                 
41                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
42                         if(processors == 1){
43                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
44                         }else{
45                                 int numPairs = namesOfGroupCombos.size();
46                                 
47                                 int numPairsPerProcessor = numPairs / processors;
48                                 
49                                 for (int i = 0; i < processors; i++) {
50                                         int startPos = i * numPairsPerProcessor;
51                                         if(i == processors - 1){
52                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
53                                         }
54                                         lines.push_back(new linePair(startPos, numPairsPerProcessor));
55                                 }
56
57                                 data = createProcesses(t, namesOfGroupCombos, sums);
58                                 
59                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
60                         }
61                 #else
62                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
63                 #endif
64                 
65                 return data;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "Weighted", "getValues");
69                 exit(1);
70         }
71 }
72 /**************************************************************************************************/
73
74 EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector<double>& sums) {
75         try {
76 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
77                 int process = 1;
78                 int num = 0;
79                 vector<int> processIDS;
80                 
81                 EstOutput results;
82                 
83                 //loop through and create all the processes you want
84                 while (process != processors) {
85                         int pid = fork();
86                         
87                         if (pid > 0) {
88                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
89                                 process++;
90                         }else if (pid == 0){
91                                 EstOutput Myresults;
92                                 Myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums);
93                                 
94                                 if (m->control_pressed) { exit(0); }
95                                 
96                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
97                                 
98                                 //pass numSeqs to parent
99                                 ofstream out;
100                                 string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp";
101                                 m->openOutputFile(tempFile, out);
102                                 out << Myresults.size() << endl;
103                                 for (int i = 0; i < Myresults.size(); i++) {  out << Myresults[i] << '\t';  } out << endl;
104                                 out.close();
105                                 
106                                 exit(0);
107                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
108                 }
109         
110                 results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num, sums);
111                 
112                 //force parent to wait until all the processes are done
113                 for (int i=0;i<(processors-1);i++) { 
114                         int temp = processIDS[i];
115                         wait(&temp);
116                 }
117                 
118                 if (m->control_pressed) { return results; }
119                 
120                 //get data created by processes
121                 for (int i=0;i<(processors-1);i++) { 
122                         ifstream in;
123                         string s = outputDir + toString(processIDS[i]) + ".weighted.results.temp";
124                         m->openInputFile(s, in);
125                         
126                         //get quantiles
127                         while (!in.eof()) {
128                                 int num;
129                                 in >> num; m->gobble(in);
130                                 
131                                 if (m->control_pressed) { break; }
132
133                                 double w; 
134                                 for (int j = 0; j < num; j++) {
135                                         in >> w;
136                                         results.push_back(w);
137                                 }
138                                 m->gobble(in);
139                         }
140                         in.close();
141                         remove(s.c_str());
142                 }
143                 
144                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
145                 
146                 return results;
147 #endif          
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "Weighted", "createProcesses");
151                 exit(1);
152         }
153 }
154 /**************************************************************************************************/
155 EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector<double>& sums) { 
156  try {
157                 globaldata = GlobalData::getInstance();
158                 
159                 EstOutput results;
160                 vector<double> D;
161                 
162                 int count = 0;
163                 int total = num;
164                 int twentyPercent = (total * 0.20);
165
166                 for (int h = start; h < (start+num); h++) {
167                 
168                         if (m->control_pressed) { return results; }
169                 
170                         //initialize weighted score
171                         string groupA = namesOfGroupCombos[h][0]; 
172                         string groupB = namesOfGroupCombos[h][1];
173                         
174                         WScore[groupA+groupB] = 0.0;
175                         D.push_back(0.0000); //initialize a spot in D for each combination
176                         
177                         //adding the wieghted sums from group i
178                         for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i
179                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupA][j]].pcount.find(groupA);
180                                 int numSeqsInGroupI = it->second;
181                                 
182                                 double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groupA][j]]) / (double)tmap->seqsPerGroup[groupA]);
183                         
184                                 D[count] += weightedSum;
185                         }
186                         
187                         //adding the wieghted sums from group l
188                         for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
189                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB);
190                                 int numSeqsInGroupL = it->second;
191                                 
192                                 double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groupB][j]]) / (double)tmap->seqsPerGroup[groupB]);
193                         
194                                 D[count] += weightedSum;
195                         }
196                         count++;
197                         
198                         //report progress
199                         if((count) % twentyPercent == 0){       m->mothurOut("Percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine();           }
200                 }
201                 
202                 m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine();
203                 
204                 //calculate u for the group comb 
205                 for(int i=0;i<t->getNumNodes();i++){
206                         for (int h = start; h < (start+num); h++) {
207                                 
208                                 string groupA = namesOfGroupCombos[h][0]; 
209                                 string groupB = namesOfGroupCombos[h][1];
210
211                                 if (m->control_pressed) { return results; }
212                                 
213                                 double u;
214                                 //does this node have descendants from groupA
215                                 it = t->tree[i].pcount.find(groupA);
216                                 //if it does u = # of its descendants with a certain group / total number in tree with a certain group
217                                 if (it != t->tree[i].pcount.end()) {
218                                         u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
219                                 }else { u = 0.00; }
220                         
221                                                         
222                                 //does this node have descendants from group l
223                                 it = t->tree[i].pcount.find(groupB);
224                                 //if it does subtract their percentage from u
225                                 if (it != t->tree[i].pcount.end()) {
226                                         u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
227                                 }
228                                                         
229                                 u = abs(u * t->tree[i].getBranchLength());
230                                                 
231                                 //save groupcombs u value
232                                 WScore[(groupA+groupB)] += u;
233                         }
234                 }
235                 
236                 /********************************************************/
237                 
238                 //calculate weighted score for the group combination
239                 double UN;      
240                 count = 0;
241                 for (int h = start; h < (start+num); h++) {
242                         UN = (WScore[namesOfGroupCombos[h][0]+namesOfGroupCombos[h][1]] / D[count]);
243                 
244                         if (isnan(UN) || isinf(UN)) { UN = 0; } 
245                         results.push_back(UN);
246                         count++;
247                 }
248                                 
249                 return results; 
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "Weighted", "driver");
253                 exit(1);
254         }
255 }
256 /**************************************************************************************************/
257 EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector<double>& sums) { 
258  try {
259                 globaldata = GlobalData::getInstance();
260                 
261                 data.clear(); //clear out old values
262                 
263                 if (m->control_pressed) { return data; }
264                 
265                 //initialize weighted score
266                 WScore[(groupA+groupB)] = 0.0;
267                 double D = 0.0;
268                 
269                 vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
270                 
271                 //adding the wieghted sums from group i
272                 for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
273                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
274                         int numSeqsInGroupI = it->second;
275                         
276                         double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]);
277                 
278                         D += weightedSum;
279                 }
280                 
281                 //adding the wieghted sums from group l
282                 for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
283                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
284                         int numSeqsInGroupL = it->second;
285                         
286                         double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]);
287                 
288                         D += weightedSum;
289                 }
290                 
291                 //calculate u for the group comb 
292                 for(int i=0;i<t->getNumNodes();i++){
293                 
294                         if (m->control_pressed) { return data; }
295                         
296                         double u;
297                         //does this node have descendants from groupA
298                         it = t->tree[i].pcount.find(groupA);
299                         //if it does u = # of its descendants with a certain group / total number in tree with a certain group
300                         if (it != t->tree[i].pcount.end()) {
301                                 u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
302                         }else { u = 0.00; }
303                 
304                                                 
305                         //does this node have descendants from group l
306                         it = t->tree[i].pcount.find(groupB);
307                         //if it does subtract their percentage from u
308                         if (it != t->tree[i].pcount.end()) {
309                                 u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
310                         }
311                                                 
312                         u = abs(u * t->tree[i].getBranchLength());
313                                         
314                         //save groupcombs u value
315                         WScore[(groupA+groupB)] += u;
316                 }
317                 
318                 /********************************************************/
319                 
320                 //calculate weighted score for the group combination
321                 double UN;      
322                 UN = (WScore[(groupA+groupB)] / D);
323                 
324                 if (isnan(UN) || isinf(UN)) { UN = 0; } 
325                 data.push_back(UN);
326                                 
327                 return data; 
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "Weighted", "getValues");
331                 exit(1);
332         }
333 }
334 /**************************************************************************************************/
335 vector<double> Weighted::getBranchLengthSums(Tree* t) { 
336         try {
337                 
338                 vector<double> sums;
339                 
340                 for(int v=0;v<t->getNumLeaves();v++){
341                         
342                         if (m->control_pressed) { return sums; }
343                                 
344                         int index = v;
345                         double sum = 0.0000;
346         
347                         //while you aren't at root
348                         while(t->tree[index].getParent() != -1){
349                                                 
350                                 //if you have a BL
351                                 if(t->tree[index].getBranchLength() != -1){
352                                         sum += abs(t->tree[index].getBranchLength());
353                                 }
354                                 index = t->tree[index].getParent();
355                         }
356                                         
357                         //get last breanch length added
358                         if(t->tree[index].getBranchLength() != -1){
359                                 sum += abs(t->tree[index].getBranchLength());
360                         }
361                         
362                         sums.push_back(sum);
363                 }
364                 
365                 return sums;
366         }
367         catch(exception& e) {
368                 m->errorOut(e, "Weighted", "getBranchLengthSums");
369                 exit(1);
370         }
371 }
372 /**************************************************************************************************/
373
374
375