]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
working on parallelizing unifrac.unweighted.
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = r+1; l < numGroups; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                                         if(i == processors - 1){
63                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
64                                         }
65                                         lines.push_back(new linePair(startPos, numPairsPerProcessor));
66                                 }
67
68                                 data = createProcesses(t, namesOfGroupCombos);
69                                 
70                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
71                         }
72                 #else
73                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
74                 #endif
75                 
76                 return data;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "Unweighted", "getValues");
80                 exit(1);
81         }
82 }
83 /**************************************************************************************************/
84
85 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
86         try {
87 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
88                 int process = 1;
89                 int num = 0;
90                 vector<int> processIDS;
91                 
92                 EstOutput results;
93                 
94                 //loop through and create all the processes you want
95                 while (process != processors) {
96                         int pid = fork();
97                         
98                         if (pid > 0) {
99                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
100                                 process++;
101                         }else if (pid == 0){
102                                 EstOutput myresults;
103                                 myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num);
104                                 
105                                 if (m->control_pressed) { exit(0); }
106                                 
107                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
108                                 
109                                 //pass numSeqs to parent
110                                 ofstream out;
111                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
112                                 m->openOutputFile(tempFile, out);
113                                 out << myresults.size() << endl;
114                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
115                                 out.close();
116                                 
117                                 exit(0);
118                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
119                 }
120                 
121                 results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num);
122                 
123                 //force parent to wait until all the processes are done
124                 for (int i=0;i<(processors-1);i++) { 
125                         int temp = processIDS[i];
126                         wait(&temp);
127                 }
128                 
129                 if (m->control_pressed) { return results; }
130                 
131                 //get data created by processes
132                 for (int i=0;i<(processors-1);i++) { 
133                         ifstream in;
134                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
135                         m->openInputFile(s, in);
136                         
137                         //get quantiles
138                         if (!in.eof()) {
139                                 int num;
140                                 in >> num; m->gobble(in);
141                                 
142                                 if (m->control_pressed) { break; }
143                                 
144                                 double w; 
145                                 for (int j = 0; j < num; j++) {
146                                         in >> w;
147                                         results.push_back(w);
148                                 }
149                                 m->gobble(in);
150                         }
151                         in.close();
152                         remove(s.c_str());
153                 }
154                 
155                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
156                 
157                 return results;
158 #endif          
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "Unweighted", "createProcesses");
162                 exit(1);
163         }
164 }
165 /**************************************************************************************************/
166 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
167  try {
168                 
169                 EstOutput results; results.resize(num);
170                 
171                 int count = 0;
172                 int total = num;
173                 int twentyPercent = (total * 0.20);
174
175                 for (int h = start; h < (start+num); h++) {
176                 
177                         if (m->control_pressed) { return results; }
178                 
179                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
180                         double totalBL = 0.00;  //all branch lengths
181                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
182                                 
183                         for(int i=0;i<t->getNumNodes();i++){
184                         
185                                 if (m->control_pressed) {  return data; }
186                                 
187                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
188                                 //pcountSize = 2, not unique to one group
189                                 //pcountSize = 1, unique to one group
190                                 
191                                 int pcountSize = 0;
192                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
193                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
194                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
195                                 }
196                                 
197                                 if (pcountSize == 0) { }
198                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
199                                         
200                                 if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
201                                         totalBL += abs(t->tree[i].getBranchLength()); 
202                                 }
203                         }
204                 
205                         UW = (UniqueBL / totalBL);  
206         
207                         if (isnan(UW) || isinf(UW)) { UW = 0; }
208         
209                         results[count] = UW;
210                         count++;
211
212                         //report progress
213                         if((count) % twentyPercent == 0){       m->mothurOut("Percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine();           }
214                 }
215                 
216                 m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine();
217                 
218                 return results; 
219         }
220         catch(exception& e) {
221                 m->errorOut(e, "Unweighted", "driver");
222                 exit(1);
223         }
224 }
225 /**************************************************************************************************/
226
227 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { 
228  try {
229         globaldata = GlobalData::getInstance();
230                 
231                 vector<string> groups;
232                 double UniqueBL;  //a branch length is unique if it's chidren are from the same group
233                 double totalBL; //all branch lengths
234                 double UW;              //Unweighted Value = UniqueBL / totalBL;
235                 copyTree = new Tree;
236
237                 //if the users enters no groups then give them the score of all groups
238                 int numGroups = globaldata->Groups.size();
239
240                 //calculate number of comparsions
241                 int numComp = 0;
242                 for (int r=0; r<numGroups; r++) { 
243                         for (int l = r+1; l < numGroups; l++) {
244                                 numComp++;
245                         }
246                 }
247
248                 //numComp+1 for AB, AC, BC, ABC
249                 data.resize(numComp+1,0);
250                 
251                 int count = 0;
252                 for (int a=0; a<numGroups; a++) { 
253                         for (int l = a+1; l < numGroups; l++) {
254                                 UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
255                                 totalBL = 0.00; //all branch lengths
256                                 UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
257                                 
258                                 //copy random tree passed in
259                                 copyTree->getCopy(t);
260                                                                 
261                                 //groups in this combo
262                                 groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
263                                 
264                                 //swap labels in the groups you want to compare
265                                 copyTree->assembleRandomUnifracTree(groups[0], groups[1]);
266                                 
267                                 if (m->control_pressed) { delete copyTree; return data; }
268                                 
269                                 for(int i=0;i<copyTree->getNumNodes();i++){
270                         
271                                         if (m->control_pressed) {  return data; }
272                                         
273                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
274                                         //pcountSize = 2, not unique to one group
275                                         //pcountSize = 1, unique to one group
276                                         
277                                         int pcountSize = 0;
278                                         for (int j = 0; j < groups.size(); j++) {
279                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]);
280                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; } 
281                                         }
282                                         
283                                         if (pcountSize == 0) { }
284                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
285                                                 
286                                         if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
287                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
288                                         }
289                                 }
290
291                                 
292                                 UW = (UniqueBL / totalBL);  
293         
294                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
295         
296                                 data[count] = UW;
297                                 count++;
298                                 groups.clear();
299                         }
300                 }
301                 
302                 
303                 if (numComp != 1) {
304                         if (numGroups == 0) {
305                                 //get score for all users groups
306                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
307                                         if (tmap->namesOfGroups[i] != "xxx") {
308                                                 groups.push_back(tmap->namesOfGroups[i]);
309                                         }
310                                 }
311                         }else {
312                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
313                                         groups.push_back(globaldata->Groups[i]);
314                                 }
315                         }
316                 
317                         UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
318                         totalBL = 0.00; //all branch lengths
319                         UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
320                 
321                         //copy random tree passed in
322                         copyTree->getCopy(t);
323                                 
324                         //swap labels in all the groups you want to compare
325                         copyTree->assembleRandomUnifracTree(groups);
326                         
327                         if (m->control_pressed) { delete copyTree; return data; }
328
329                         for(int i=0;i<copyTree->getNumNodes();i++){
330                         
331                                 if (m->control_pressed) {  return data; }
332                                 
333                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
334                                 //pcountSize = 2, not unique to one group
335                                 //pcountSize = 1, unique to one group
336                                 
337                                 int pcountSize = 0;
338                                 for (int j = 0; j < groups.size(); j++) {
339                                         map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]);
340                                         if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
341                                 }
342                                 
343                                 if (pcountSize == 0) { }
344                                 else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
345                                         
346                                 if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
347                                         totalBL += abs(copyTree->tree[i].getBranchLength()); 
348                                 }
349                         }
350                 
351                         UW = (UniqueBL / totalBL);  
352         
353                         if (isnan(UW) || isinf(UW)) { UW = 0; }
354         
355                         data[count] = UW;
356                 }
357                 
358                 delete copyTree;
359                 
360                 return data;
361         
362         }
363         catch(exception& e) {
364                 m->errorOut(e, "Unweighted", "getValues");
365                 exit(1);
366         }
367 }
368
369 /**************************************************************************************************/
370
371