]> git.donarmstrong.com Git - mothur.git/blob - weighted.cpp
fixed order of unifrac commands so that it matches the summary commands
[mothur.git] / weighted.cpp
1 /*
2  *  weighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "weighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Weighted::getValues(Tree* t, int p, string o) {
15     try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 data.clear(); //clear out old values
19                 int numGroups;
20                 vector<double> D;
21                 processors = p;
22                 outputDir = o;
23                 
24                 numGroups = globaldata->Groups.size();
25         
26                 vector<double> sums = getBranchLengthSums(t);
27                 
28                 if (m->control_pressed) { return data; }
29                 
30                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
31                 vector< vector<string> > namesOfGroupCombos;
32                 for (int i=0; i<numGroups; i++) { 
33                         for (int l = 0; l < i; l++) {   
34                                 //initialize weighted scores
35                                 //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
36                                 vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
37                                 namesOfGroupCombos.push_back(groups);
38                         }
39                 }
40                 
41                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
42                         if(processors == 1){
43                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
44                         }else{
45                                 int numPairs = namesOfGroupCombos.size();
46                                 
47                                 int numPairsPerProcessor = numPairs / processors;
48                                 
49                                 for (int i = 0; i < processors; i++) {
50                                         int startPos = i * numPairsPerProcessor;
51                                         if(i == processors - 1){
52                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
53                                         }
54                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
55                                 }
56
57                                 data = createProcesses(t, namesOfGroupCombos, sums);
58                                 
59                                 lines.clear();
60                         }
61                 #else
62                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums);
63                 #endif
64                 
65                 return data;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "Weighted", "getValues");
69                 exit(1);
70         }
71 }
72 /**************************************************************************************************/
73
74 EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector<double>& sums) {
75         try {
76 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
77                 int process = 1;
78                 int num = 0;
79                 vector<int> processIDS;
80                 
81                 EstOutput results;
82                 
83                 //loop through and create all the processes you want
84                 while (process != processors) {
85                         int pid = fork();
86                         
87                         if (pid > 0) {
88                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
89                                 process++;
90                         }else if (pid == 0){
91         
92                                 EstOutput Myresults;
93                                 Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, sums);
94                         
95                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
96                                 
97                                 //pass numSeqs to parent
98                                 ofstream out;
99
100                                 string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp";
101         
102                                 m->openOutputFile(tempFile, out);
103         
104                                 out << Myresults.size() << endl;
105                                 for (int i = 0; i < Myresults.size(); i++) {  out << Myresults[i] << '\t';  } out << endl;
106                                 out.close();
107                                 
108                                 exit(0);
109                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
110                 }
111         
112                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, sums);
113         
114                 //force parent to wait until all the processes are done
115                 for (int i=0;i<(processors-1);i++) { 
116                         int temp = processIDS[i];
117                         wait(&temp);
118                 }
119         
120                 if (m->control_pressed) { return results; }
121                 
122                 //get data created by processes
123                 for (int i=0;i<(processors-1);i++) { 
124                         ifstream in;
125                         string s = outputDir + toString(processIDS[i]) + ".weighted.results.temp";
126                         m->openInputFile(s, in);
127                         
128                         //get quantiles
129                         while (!in.eof()) {
130                                 int num;
131                                 in >> num; m->gobble(in);
132                                 
133                                 if (m->control_pressed) { break; }
134
135                                 double w; 
136                                 for (int j = 0; j < num; j++) {
137                                         in >> w;
138                                         results.push_back(w);
139                                 }
140                                 m->gobble(in);
141                         }
142                         in.close();
143                         remove(s.c_str());
144                 }
145                 
146                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
147                 
148                 return results;
149 #endif          
150         }
151         catch(exception& e) {
152                 m->errorOut(e, "Weighted", "createProcesses");
153                 exit(1);
154         }
155 }
156 /**************************************************************************************************/
157 EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector<double>& sums) { 
158  try {
159                 EstOutput results;
160                 vector<double> D;
161                 
162                 int count = 0;
163                 for (int h = start; h < (start+num); h++) {
164                 
165                         if (m->control_pressed) { return results; }
166                 
167                         //initialize weighted score
168                         string groupA = namesOfGroupCombos[h][0]; 
169                         string groupB = namesOfGroupCombos[h][1];
170                         
171                         WScore[groupA+groupB] = 0.0;
172                         D.push_back(0.0000); //initialize a spot in D for each combination
173                         
174                         //adding the wieghted sums from group i
175                         for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i
176                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupA][j]].pcount.find(groupA);
177                                 int numSeqsInGroupI = it->second;
178                                 
179                                 double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groupA][j]]) / (double)tmap->seqsPerGroup[groupA]);
180                         
181                                 D[count] += weightedSum;
182                         }
183                         
184                         //adding the wieghted sums from group l
185                         for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
186                                 map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB);
187                                 int numSeqsInGroupL = it->second;
188                                 
189                                 double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groupB][j]]) / (double)tmap->seqsPerGroup[groupB]);
190                         
191                                 D[count] += weightedSum;
192                         }
193                         count++;
194                 }
195                 
196                 //calculate u for the group comb 
197                 int total = t->getNumNodes();
198                 int twentyPercent = (total * 0.20);
199                 
200                 for(int i=0;i<t->getNumNodes();i++){
201                         for (int h = start; h < (start+num); h++) {
202                                 
203                                 string groupA = namesOfGroupCombos[h][0]; 
204                                 string groupB = namesOfGroupCombos[h][1];
205
206                                 if (m->control_pressed) { return results; }
207                                 
208                                 double u;
209                                 //does this node have descendants from groupA
210                                 it = t->tree[i].pcount.find(groupA);
211                                 //if it does u = # of its descendants with a certain group / total number in tree with a certain group
212                                 if (it != t->tree[i].pcount.end()) {
213                                         u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
214                                 }else { u = 0.00; }
215                         
216                                                         
217                                 //does this node have descendants from group l
218                                 it = t->tree[i].pcount.find(groupB);
219                                 //if it does subtract their percentage from u
220                                 if (it != t->tree[i].pcount.end()) {
221                                         u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
222                                 }
223                                                         
224                                 u = abs(u * t->tree[i].getBranchLength());
225                                                 
226                                 //save groupcombs u value
227                                 WScore[(groupA+groupB)] += u;
228                         }
229                         //report progress
230                         if((i) % twentyPercent == 0){   m->mothurOut("Percentage complete: " + toString(int((i / (float)total) * 100.0))); m->mothurOutEndLine();               }
231                 }
232                 
233                 m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine();
234                 
235                 /********************************************************/
236                 //calculate weighted score for the group combination
237                 double UN;      
238                 count = 0;
239                 for (int h = start; h < (start+num); h++) {
240                         UN = (WScore[namesOfGroupCombos[h][0]+namesOfGroupCombos[h][1]] / D[count]);
241                 
242                         if (isnan(UN) || isinf(UN)) { UN = 0; } 
243                         results.push_back(UN);
244                         count++;
245                 }
246                                 
247                 return results; 
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "Weighted", "driver");
251                 exit(1);
252         }
253 }
254 /**************************************************************************************************/
255 EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector<double>& sums) { 
256  try {
257                 globaldata = GlobalData::getInstance();
258                 
259                 data.clear(); //clear out old values
260                 
261                 if (m->control_pressed) { return data; }
262                 
263                 //initialize weighted score
264                 WScore[(groupA+groupB)] = 0.0;
265                 double D = 0.0;
266                 
267                 vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
268                 
269                 //adding the wieghted sums from group i
270                 for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
271                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
272                         int numSeqsInGroupI = it->second;
273                         
274                         double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]);
275                 
276                         D += weightedSum;
277                 }
278                 
279                 //adding the wieghted sums from group l
280                 for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
281                         map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
282                         int numSeqsInGroupL = it->second;
283                         
284                         double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]);
285                 
286                         D += weightedSum;
287                 }
288                 
289                 //calculate u for the group comb 
290                 for(int i=0;i<t->getNumNodes();i++){
291                 
292                         if (m->control_pressed) { return data; }
293                         
294                         double u;
295                         //does this node have descendants from groupA
296                         it = t->tree[i].pcount.find(groupA);
297                         //if it does u = # of its descendants with a certain group / total number in tree with a certain group
298                         if (it != t->tree[i].pcount.end()) {
299                                 u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
300                         }else { u = 0.00; }
301                 
302                                                 
303                         //does this node have descendants from group l
304                         it = t->tree[i].pcount.find(groupB);
305                         //if it does subtract their percentage from u
306                         if (it != t->tree[i].pcount.end()) {
307                                 u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
308                         }
309                                                 
310                         u = abs(u * t->tree[i].getBranchLength());
311                                         
312                         //save groupcombs u value
313                         WScore[(groupA+groupB)] += u;
314                 }
315                 
316                 /********************************************************/
317                 
318                 //calculate weighted score for the group combination
319                 double UN;      
320                 UN = (WScore[(groupA+groupB)] / D);
321                 
322                 if (isnan(UN) || isinf(UN)) { UN = 0; } 
323                 data.push_back(UN);
324                                 
325                 return data; 
326         }
327         catch(exception& e) {
328                 m->errorOut(e, "Weighted", "getValues");
329                 exit(1);
330         }
331 }
332 /**************************************************************************************************/
333 vector<double> Weighted::getBranchLengthSums(Tree* t) { 
334         try {
335                 
336                 vector<double> sums;
337                 
338                 for(int v=0;v<t->getNumLeaves();v++){
339                         
340                         if (m->control_pressed) { return sums; }
341                                 
342                         int index = v;
343                         double sum = 0.0000;
344         
345                         //while you aren't at root
346                         while(t->tree[index].getParent() != -1){
347                                                 
348                                 //if you have a BL
349                                 if(t->tree[index].getBranchLength() != -1){
350                                         sum += abs(t->tree[index].getBranchLength());
351                                 }
352                                 index = t->tree[index].getParent();
353                         }
354                                         
355                         //get last breanch length added
356                         if(t->tree[index].getBranchLength() != -1){
357                                 sum += abs(t->tree[index].getBranchLength());
358                         }
359                         
360                         sums.push_back(sum);
361                 }
362                 
363                 return sums;
364         }
365         catch(exception& e) {
366                 m->errorOut(e, "Weighted", "getBranchLengthSums");
367                 exit(1);
368         }
369 }
370 /**************************************************************************************************/
371
372
373