]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
cluster.split fix
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = 0; l < r; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                         
63                                         if(i == processors - 1){
64                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
65                                         }
66                 
67                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
68                                 }
69
70                                 data = createProcesses(t, namesOfGroupCombos);
71                                 lines.clear();
72                         }
73                 #else
74                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
75                 #endif
76                 
77                 return data;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "Unweighted", "getValues");
81                 exit(1);
82         }
83 }
84 /**************************************************************************************************/
85
86 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
87         try {
88 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
89                 int process = 1;
90                 int num = 0;
91                 vector<int> processIDS;
92                 
93                 EstOutput results;
94                 
95                 //loop through and create all the processes you want
96                 while (process != processors) {
97                         int pid = fork();
98                         
99                         if (pid > 0) {
100                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
101                                 process++;
102                         }else if (pid == 0){
103                                 EstOutput myresults;
104                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
105                                 
106                                 if (m->control_pressed) { exit(0); }
107                                 
108                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
109                                 
110                                 //pass numSeqs to parent
111                                 ofstream out;
112                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
113                                 m->openOutputFile(tempFile, out);
114                                 out << myresults.size() << endl;
115                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
116                                 out.close();
117                                 
118                                 exit(0);
119                         }else { 
120                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
121                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
122                                 exit(0); 
123                         }
124                 }
125                 
126                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
127                 
128                 //force parent to wait until all the processes are done
129                 for (int i=0;i<(processors-1);i++) { 
130                         int temp = processIDS[i];
131                         wait(&temp);
132                 }
133                 
134                 if (m->control_pressed) { return results; }
135                 
136                 //get data created by processes
137                 for (int i=0;i<(processors-1);i++) { 
138                         ifstream in;
139                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
140                         m->openInputFile(s, in);
141                         
142                         //get quantiles
143                         if (!in.eof()) {
144                                 int num;
145                                 in >> num; m->gobble(in);
146                                 
147                                 if (m->control_pressed) { break; }
148                                 
149                                 double w; 
150                                 for (int j = 0; j < num; j++) {
151                                         in >> w;
152                                         results.push_back(w);
153                                 }
154                                 m->gobble(in);
155                         }
156                         in.close();
157                         remove(s.c_str());
158                 }
159                 
160                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
161                 
162                 return results;
163 #endif          
164         }
165         catch(exception& e) {
166                 m->errorOut(e, "Unweighted", "createProcesses");
167                 exit(1);
168         }
169 }
170 /**************************************************************************************************/
171 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
172  try {
173                 
174                 EstOutput results; results.resize(num);
175                 
176                 int count = 0;
177                 int numLeaves = t->getNumLeaves();
178                 int total = num;
179                 int twentyPercent = (total * 0.20);
180                 if (twentyPercent == 0) { twentyPercent = 1; }
181                 
182                 
183                 for (int h = start; h < (start+num); h++) {
184         //cout << namesOfGroupCombos[h][0] << '\t' << namesOfGroupCombos[h][1] << endl;         
185                         if (m->control_pressed) { return results; }
186                 
187                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
188                         double totalBL = 0.00;  //all branch lengths
189                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
190                         map<int, double> tempTotals; //maps node to total Branch Length
191                         map<int, int> nodePcountSize; //maps node to pcountSize
192                         map<int, int>::iterator itCount;
193                                 
194                         for(int i=0;i<t->getNumNodes();i++){
195                         
196                                 if (m->control_pressed) {  return data; }
197                                 
198                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
199                                 //pcountSize = 2, not unique to one group
200                                 //pcountSize = 1, unique to one group
201                                 
202                                 int pcountSize = 0;
203                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
204                                         map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
205                                         if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
206                                 }
207
208                                 nodePcountSize[i] = pcountSize;
209                                 
210                                 //cout << i << '\t' << t->tree[i].getName() << " br = " << abs(t->tree[i].getBranchLength()) << '\t';           
211                                 if (pcountSize == 0) { }
212                                 else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength()); }
213                                 
214                                 //if you are a leaf from a users group add to total
215                                 if (i < numLeaves) {
216                                         if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
217                                         //cout << "added to total" << endl; 
218                                                 totalBL += abs(t->tree[i].getBranchLength()); 
219                                         }
220                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
221                                 }else{ //if you are not a leaf 
222                                         //do both your chidren have have descendants from the users groups? 
223                                         int lc = t->tree[i].getLChild();
224                                         int rc = t->tree[i].getRChild();
225                                         
226                                         //if yes, add your childrens tempTotals
227                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
228                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
229                                                 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
230                                                 if (t->tree[i].getBranchLength() != -1) {
231                                                         tempTotals[i] = abs(t->tree[i].getBranchLength());
232                                                 }else {
233                                                         tempTotals[i] = 0.0;
234                                                 }
235                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
236                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
237                                                 tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[i].getBranchLength()); 
238                                         }
239                                         //cout << "temptotal = "<< tempTotals[i] << endl;
240                                 }
241
242                         }
243         //cout << UniqueBL << '\t' << totalBL << endl;          
244                         UW = (UniqueBL / totalBL);  
245         
246                         if (isnan(UW) || isinf(UW)) { UW = 0; }
247         
248                         results[count] = UW;
249                         count++;
250
251                         //report progress
252                         if((count % twentyPercent) == 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
253                 }
254                 
255                 //report progress
256                 if((count % twentyPercent) != 0) {      float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
257                 
258                 return results; 
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "Unweighted", "driver");
262                 exit(1);
263         }
264 }
265 /**************************************************************************************************/
266
267 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
268  try {
269                 globaldata = GlobalData::getInstance();
270                 processors = p;
271                 outputDir = o;
272                 
273                 
274                 //if the users enters no groups then give them the score of all groups
275                 int numGroups = globaldata->Groups.size();
276                 
277                 //calculate number of comparsions
278                 int numComp = 0;
279                 vector< vector<string> > namesOfGroupCombos;
280                 for (int r=0; r<numGroups; r++) { 
281                         for (int l = 0; l < r; l++) {
282                                 numComp++;
283                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
284                                 namesOfGroupCombos.push_back(groups);
285                         }
286                 }
287                 
288                 if (numComp != 1) {
289                         vector<string> groups;
290                         if (numGroups == 0) {
291                                 //get score for all users groups
292                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
293                                         if (tmap->namesOfGroups[i] != "xxx") {
294                                                 groups.push_back(tmap->namesOfGroups[i]);
295                                         }
296                                 }
297                                 namesOfGroupCombos.push_back(groups);
298                         }else {
299                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
300                                         groups.push_back(globaldata->Groups[i]);
301                                 }
302                                 namesOfGroupCombos.push_back(groups);
303                         }
304                 }
305
306                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
307                         if(processors == 1){
308                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
309                         }else{
310                                 int numPairs = namesOfGroupCombos.size();
311                                 
312                                 int numPairsPerProcessor = numPairs / processors;
313                                 
314                                 for (int i = 0; i < processors; i++) {
315                                         int startPos = i * numPairsPerProcessor;
316                                         if(i == processors - 1){
317                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
318                                         }
319                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
320                                 }
321
322                                 data = createProcesses(t, namesOfGroupCombos, true);
323                                 
324                                 lines.clear();
325                         }
326                 #else
327                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
328                 #endif
329         
330                 return data;
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "Unweighted", "getValues");
334                 exit(1);
335         }
336 }
337 /**************************************************************************************************/
338
339 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
340         try {
341 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
342                 int process = 1;
343                 int num = 0;
344                 vector<int> processIDS;
345                 
346                 EstOutput results;
347                 
348                 //loop through and create all the processes you want
349                 while (process != processors) {
350                         int pid = fork();
351                         
352                         if (pid > 0) {
353                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
354                                 process++;
355                         }else if (pid == 0){
356                                 EstOutput myresults;
357                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
358                                 
359                                 if (m->control_pressed) { exit(0); }
360                                 
361                                 //pass numSeqs to parent
362                                 ofstream out;
363                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
364                                 m->openOutputFile(tempFile, out);
365                                 out << myresults.size() << endl;
366                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
367                                 out.close();
368                                 
369                                 exit(0);
370                         }else { 
371                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
372                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
373                                 exit(0); 
374                         }
375                 }
376                 
377                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
378                 
379                 //force parent to wait until all the processes are done
380                 for (int i=0;i<(processors-1);i++) { 
381                         int temp = processIDS[i];
382                         wait(&temp);
383                 }
384                 
385                 if (m->control_pressed) { return results; }
386                 
387                 //get data created by processes
388                 for (int i=0;i<(processors-1);i++) { 
389                         ifstream in;
390                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
391                         m->openInputFile(s, in);
392                         
393                         //get quantiles
394                         if (!in.eof()) {
395                                 int num;
396                                 in >> num; m->gobble(in);
397                                 
398                                 if (m->control_pressed) { break; }
399                                 
400                                 double w; 
401                                 for (int j = 0; j < num; j++) {
402                                         in >> w;
403                                         results.push_back(w);
404                                 }
405                                 m->gobble(in);
406                         }
407                         in.close();
408                         remove(s.c_str());
409                 }
410                 
411                 return results;
412 #endif          
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "Unweighted", "createProcesses");
416                 exit(1);
417         }
418 }
419 /**************************************************************************************************/
420 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
421  try {
422                 
423                 EstOutput results; results.resize(num);
424                 
425                 int count = 0;
426                 int total = num;
427                 int twentyPercent = (total * 0.20);
428                 int numLeaves = t->getNumLeaves();
429                 
430                 Tree* copyTree = new Tree;
431                 
432                 for (int h = start; h < (start+num); h++) {
433                 
434                         if (m->control_pressed) { return results; }
435                 
436                         //copy random tree passed in
437                         copyTree->getCopy(t);
438                                 
439                         //swap labels in the groups you want to compare
440                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
441                         
442                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
443                         double totalBL = 0.00;  //all branch lengths
444                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
445                         map<int, double> tempTotals; //maps node to total Branch Length
446                         map<int, int> nodePcountSize; //maps node to pcountSize
447                                 
448                         for(int i=0;i<copyTree->getNumNodes();i++){
449                         
450                                 if (m->control_pressed) {  return data; }
451                                 
452                                 //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
453                                 //pcountSize = 2, not unique to one group
454                                 //pcountSize = 1, unique to one group
455                                 
456                                 int pcountSize = 0;
457                                 for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
458                                         map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
459                                         if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
460                                 }
461                                 
462                                 nodePcountSize[i] = pcountSize;
463                         
464                                 if (pcountSize == 0) { }
465                                 else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());      }
466                                 
467                                 //if you are a leaf from a users group add to total
468                                 if (i < numLeaves) {
469                                         if ((copyTree->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
470                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
471                                         }
472                                         tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
473                                 }else{ //if you are not a leaf 
474                                         //do both your chidren have have descendants from the users groups? 
475                                         int lc = copyTree->tree[i].getLChild();
476                                         int rc = copyTree->tree[i].getRChild();
477                                         
478                                         //if yes, add your childrens tempTotals
479                                         if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
480                                                 totalBL += tempTotals[lc] + tempTotals[rc]; 
481                                                 
482                                                 if (copyTree->tree[i].getBranchLength() != -1) {
483                                                         tempTotals[i] = abs(copyTree->tree[i].getBranchLength());
484                                                 }else {
485                                                         tempTotals[i] = 0.0;
486                                                 }
487                                         }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
488                                         }else { //if no, your tempTotal is your childrens temp totals + your branch length
489                                                 tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(copyTree->tree[i].getBranchLength()); 
490                                         }
491                                         
492                                 }
493
494                         }
495                 
496                         UW = (UniqueBL / totalBL);  
497         
498                         if (isnan(UW) || isinf(UW)) { UW = 0; }
499         
500                         results[count] = UW;
501                         count++;
502
503                 }
504                 
505                 delete copyTree;
506                 
507                 return results; 
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "Unweighted", "driver");
511                 exit(1);
512         }
513 }
514 /**************************************************************************************************/
515
516