]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
unifrac parallelization done, changed dist.seqs output=square to calc all distance...
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = r+1; l < numGroups; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                         
63                                         if(i == processors - 1){
64                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
65                                         }
66                 
67                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
68                                 }
69
70                                 data = createProcesses(t, namesOfGroupCombos);
71                                 lines.clear();
72                         }
73                 #else
74                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
75                 #endif
76                 
77                 return data;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "Unweighted", "getValues");
81                 exit(1);
82         }
83 }
84 /**************************************************************************************************/
85
86 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
87         try {
88 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
89                 int process = 1;
90                 int num = 0;
91                 vector<int> processIDS;
92                 
93                 EstOutput results;
94                 
95                 //loop through and create all the processes you want
96                 while (process != processors) {
97                         int pid = fork();
98                         
99                         if (pid > 0) {
100                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
101                                 process++;
102                         }else if (pid == 0){
103                                 EstOutput myresults;
104                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
105                                 
106                                 if (m->control_pressed) { exit(0); }
107                                 
108                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
109                                 
110                                 //pass numSeqs to parent
111                                 ofstream out;
112                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
113                                 m->openOutputFile(tempFile, out);
114                                 out << myresults.size() << endl;
115                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
116                                 out.close();
117                                 
118                                 exit(0);
119                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
120                 }
121                 
122                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
123                 
124                 //force parent to wait until all the processes are done
125                 for (int i=0;i<(processors-1);i++) { 
126                         int temp = processIDS[i];
127                         wait(&temp);
128                 }
129                 
130                 if (m->control_pressed) { return results; }
131                 
132                 //get data created by processes
133                 for (int i=0;i<(processors-1);i++) { 
134                         ifstream in;
135                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
136                         m->openInputFile(s, in);
137                         
138                         //get quantiles
139                         if (!in.eof()) {
140                                 int num;
141                                 in >> num; m->gobble(in);
142                                 
143                                 if (m->control_pressed) { break; }
144                                 
145                                 double w; 
146                                 for (int j = 0; j < num; j++) {
147                                         in >> w;
148                                         results.push_back(w);
149                                 }
150                                 m->gobble(in);
151                         }
152                         in.close();
153                         remove(s.c_str());
154                 }
155                 
156                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
157                 
158                 return results;
159 #endif          
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "Unweighted", "createProcesses");
163                 exit(1);
164         }
165 }
166 /**************************************************************************************************/
167 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
168  try {
169                 
170                 EstOutput results; results.resize(num);
171                 
172                 int count = 0;
173                 int total = num;
174                 int twentyPercent = (total * 0.20);
175                 if (twentyPercent == 0) { twentyPercent = 1; }
176                 
177                 for (int h = start; h < (start+num); h++) {
178                 
179                         if (m->control_pressed) { return results; }
180                 
181                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
182                         double totalBL = 0.00;  //all branch lengths
183                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
184                                 
185                         for(int i=0;i<t->getNumNodes();i++){
186                         
187                                 if (m->control_pressed) {  return data; }
188                                 
189                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
190                                 //pcountSize = 2, not unique to one group
191                                 //pcountSize = 1, unique to one group
192                                 
193                                 int pcountSize = 0;
194                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
195                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
196                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
197                                 }
198                                 
199                                 if (pcountSize == 0) { }
200                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
201                                         
202                                 if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
203                                         totalBL += abs(t->tree[i].getBranchLength()); 
204                                 }
205                         }
206         
207                         UW = (UniqueBL / totalBL);  
208         
209                         if (isnan(UW) || isinf(UW)) { UW = 0; }
210         
211                         results[count] = UW;
212                         count++;
213
214                         //report progress
215                         if((count % twentyPercent) == 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
216                 }
217                 
218                 //report progress
219                 if((count % twentyPercent) != 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
220                 
221                 return results; 
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "Unweighted", "driver");
225                 exit(1);
226         }
227 }
228 /**************************************************************************************************/
229
230 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
231  try {
232                 globaldata = GlobalData::getInstance();
233                 processors = p;
234                 outputDir = o;
235                 
236                 
237                 //if the users enters no groups then give them the score of all groups
238                 int numGroups = globaldata->Groups.size();
239                 
240                 //calculate number of comparsions
241                 int numComp = 0;
242                 vector< vector<string> > namesOfGroupCombos;
243                 for (int r=0; r<numGroups; r++) { 
244                         for (int l = r+1; l < numGroups; l++) {
245                                 numComp++;
246                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
247                                 namesOfGroupCombos.push_back(groups);
248                         }
249                 }
250                 
251                 if (numComp != 1) {
252                         vector<string> groups;
253                         if (numGroups == 0) {
254                                 //get score for all users groups
255                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
256                                         if (tmap->namesOfGroups[i] != "xxx") {
257                                                 groups.push_back(tmap->namesOfGroups[i]);
258                                         }
259                                 }
260                                 namesOfGroupCombos.push_back(groups);
261                         }else {
262                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
263                                         groups.push_back(globaldata->Groups[i]);
264                                 }
265                                 namesOfGroupCombos.push_back(groups);
266                         }
267                 }
268
269                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
270                         if(processors == 1){
271                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
272                         }else{
273                                 int numPairs = namesOfGroupCombos.size();
274                                 
275                                 int numPairsPerProcessor = numPairs / processors;
276                                 
277                                 for (int i = 0; i < processors; i++) {
278                                         int startPos = i * numPairsPerProcessor;
279                                         if(i == processors - 1){
280                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
281                                         }
282                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
283                                 }
284
285                                 data = createProcesses(t, namesOfGroupCombos, true);
286                                 
287                                 lines.clear();
288                         }
289                 #else
290                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
291                 #endif
292         
293                 return data;
294         }
295         catch(exception& e) {
296                 m->errorOut(e, "Unweighted", "getValues");
297                 exit(1);
298         }
299 }
300 /**************************************************************************************************/
301
302 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
303         try {
304 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
305                 int process = 1;
306                 int num = 0;
307                 vector<int> processIDS;
308                 
309                 EstOutput results;
310                 
311                 //loop through and create all the processes you want
312                 while (process != processors) {
313                         int pid = fork();
314                         
315                         if (pid > 0) {
316                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
317                                 process++;
318                         }else if (pid == 0){
319                                 EstOutput myresults;
320                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
321                                 
322                                 if (m->control_pressed) { exit(0); }
323                                 
324                                 //pass numSeqs to parent
325                                 ofstream out;
326                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
327                                 m->openOutputFile(tempFile, out);
328                                 out << myresults.size() << endl;
329                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
330                                 out.close();
331                                 
332                                 exit(0);
333                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
334                 }
335                 
336                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
337                 
338                 //force parent to wait until all the processes are done
339                 for (int i=0;i<(processors-1);i++) { 
340                         int temp = processIDS[i];
341                         wait(&temp);
342                 }
343                 
344                 if (m->control_pressed) { return results; }
345                 
346                 //get data created by processes
347                 for (int i=0;i<(processors-1);i++) { 
348                         ifstream in;
349                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
350                         m->openInputFile(s, in);
351                         
352                         //get quantiles
353                         if (!in.eof()) {
354                                 int num;
355                                 in >> num; m->gobble(in);
356                                 
357                                 if (m->control_pressed) { break; }
358                                 
359                                 double w; 
360                                 for (int j = 0; j < num; j++) {
361                                         in >> w;
362                                         results.push_back(w);
363                                 }
364                                 m->gobble(in);
365                         }
366                         in.close();
367                         remove(s.c_str());
368                 }
369                 
370                 return results;
371 #endif          
372         }
373         catch(exception& e) {
374                 m->errorOut(e, "Unweighted", "createProcesses");
375                 exit(1);
376         }
377 }
378 /**************************************************************************************************/
379 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
380  try {
381                 
382                 EstOutput results; results.resize(num);
383                 
384                 int count = 0;
385                 int total = num;
386                 int twentyPercent = (total * 0.20);
387                 
388                 Tree* copyTree = new Tree;
389                 
390                 for (int h = start; h < (start+num); h++) {
391                 
392                         if (m->control_pressed) { return results; }
393                 
394                         //copy random tree passed in
395                         copyTree->getCopy(t);
396                                 
397                         //swap labels in the groups you want to compare
398                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
399                         
400                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
401                         double totalBL = 0.00;  //all branch lengths
402                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
403                                 
404                         for(int i=0;i<t->getNumNodes();i++){
405                         
406                                 if (m->control_pressed) {  return data; }
407                                 
408                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
409                                 //pcountSize = 2, not unique to one group
410                                 //pcountSize = 1, unique to one group
411                                 
412                                 int pcountSize = 0;
413                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
414                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
415                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
416                                 }
417                                 
418                                 if (pcountSize == 0) { }
419                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
420                                         
421                                 if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
422                                         totalBL += abs(t->tree[i].getBranchLength()); 
423                                 }
424                         }
425                 
426                         UW = (UniqueBL / totalBL);  
427         
428                         if (isnan(UW) || isinf(UW)) { UW = 0; }
429         
430                         results[count] = UW;
431                         count++;
432
433                 }
434                 
435                 delete copyTree;
436                 
437                 return results; 
438         }
439         catch(exception& e) {
440                 m->errorOut(e, "Unweighted", "driver");
441                 exit(1);
442         }
443 }
444 /**************************************************************************************************/
445
446