]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
added root parameter to the unifrac commands so you can choose to include the entire...
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 processors = p;
18                 outputDir = o;
19                         
20                 //if the users enters no groups then give them the score of all groups
21                 int numGroups = globaldata->Groups.size();
22                 
23                 //calculate number of comparsions
24                 int numComp = 0;
25                 vector< vector<string> > namesOfGroupCombos;
26                 for (int r=0; r<numGroups; r++) { 
27                         for (int l = 0; l < r; l++) {
28                                 numComp++;
29                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
30                                 namesOfGroupCombos.push_back(groups);
31                         }
32                 }
33                 
34                 if (numComp != 1) {
35                         vector<string> groups;
36                         if (numGroups == 0) {
37                                 //get score for all users groups
38                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
39                                         if (tmap->namesOfGroups[i] != "xxx") {
40                                                 groups.push_back(tmap->namesOfGroups[i]);
41                                         }
42                                 }
43                                 namesOfGroupCombos.push_back(groups);
44                         }else {
45                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
46                                         groups.push_back(globaldata->Groups[i]);
47                                 }
48                                 namesOfGroupCombos.push_back(groups);
49                         }
50                 }
51
52                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
53                         if(processors == 1){
54                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
55                         }else{
56                                 int numPairs = namesOfGroupCombos.size();
57                                 
58                                 int numPairsPerProcessor = numPairs / processors;
59                                 
60                                 for (int i = 0; i < processors; i++) {
61                                         int startPos = i * numPairsPerProcessor;
62                         
63                                         if(i == processors - 1){
64                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
65                                         }
66                 
67                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
68                                 }
69                                 data = createProcesses(t, namesOfGroupCombos);
70                                 lines.clear();
71                         }
72                 #else
73                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
74                 #endif
75                 
76                 return data;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "Unweighted", "getValues");
80                 exit(1);
81         }
82 }
83 /**************************************************************************************************/
84
85 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
86         try {
87 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
88                 int process = 1;
89                 vector<int> processIDS;
90                 
91                 EstOutput results;
92                 
93                 //loop through and create all the processes you want
94                 while (process != processors) {
95                         int pid = fork();
96                         
97                         if (pid > 0) {
98                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
99                                 process++;
100                         }else if (pid == 0){
101                                 EstOutput myresults;
102                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
103                                 
104                                 if (m->control_pressed) { exit(0); }
105                                 
106                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
107                                 
108                                 //pass numSeqs to parent
109                                 ofstream out;
110                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
111                                 m->openOutputFile(tempFile, out);
112                                 out << myresults.size() << endl;
113                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
114                                 out.close();
115                                 
116                                 exit(0);
117                         }else { 
118                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
119                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
120                                 exit(0); 
121                         }
122                 }
123                 
124                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
125                 
126                 //force parent to wait until all the processes are done
127                 for (int i=0;i<(processors-1);i++) { 
128                         int temp = processIDS[i];
129                         wait(&temp);
130                 }
131                 
132                 if (m->control_pressed) { return results; }
133                 
134                 //get data created by processes
135                 for (int i=0;i<(processors-1);i++) { 
136                         ifstream in;
137                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
138                         m->openInputFile(s, in);
139                         
140                         //get quantiles
141                         if (!in.eof()) {
142                                 int num;
143                                 in >> num; m->gobble(in);
144                                 
145                                 if (m->control_pressed) { break; }
146                                 
147                                 double w; 
148                                 for (int j = 0; j < num; j++) {
149                                         in >> w;
150                                         results.push_back(w);
151                                 }
152                                 m->gobble(in);
153                         }
154                         in.close();
155                         remove(s.c_str());
156                 }
157                 
158                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
159                 
160                 return results;
161 #endif          
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "Unweighted", "createProcesses");
165                 exit(1);
166         }
167 }
168 /**************************************************************************************************/
169 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
170  try {
171         
172          
173                 EstOutput results; results.resize(num);
174                 
175                 int count = 0;
176                 int total = num;
177                 int twentyPercent = (total * 0.20);
178                 if (twentyPercent == 0) { twentyPercent = 1; }
179          
180                         
181                 for (int h = start; h < (start+num); h++) {
182                                 
183                         if (m->control_pressed) { return results; }
184                 
185                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
186                         double totalBL = 0.00;  //all branch lengths
187                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
188                                                 
189                         //find a node that belongs to one of the groups in this combo
190                         int nodeBelonging = -1;
191                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
192                                 if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
193                         }
194                         
195                         //sanity check
196                         if (nodeBelonging == -1) {
197                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
198                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
199                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
200                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
201                         }else{
202                                 
203                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
204                                 getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
205                                 
206                                 for(int i=0;i<t->getNumNodes();i++){
207                                         
208                                         if (m->control_pressed) {  return data; }
209                                         
210                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
211                                         //pcountSize = 2, not unique to one group
212                                         //pcountSize = 1, unique to one group
213                                         
214                                         int pcountSize = 0;
215                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
216                                                 map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
217                                                 if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
218                                         }
219                                         
220                                         
221                                         //unique calc
222                                         if (pcountSize == 0) { }
223                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
224                                                 UniqueBL += abs(t->tree[i].getBranchLength()); 
225                                         }
226                                                 
227                                         //total calc
228                                         if (pcountSize == 0) { }
229                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
230                                                 totalBL += abs(t->tree[i].getBranchLength()); 
231                                         }
232                                 }
233         //cout << UniqueBL << '\t' << totalBL << endl;          
234                                 UW = (UniqueBL / totalBL);  
235         
236                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
237         
238                                 results[count] = UW;
239                         }
240                         count++;
241
242                         //report progress
243                         //if((count % twentyPercent) == 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
244                 }
245                 
246                 //report progress
247                 //if((count % twentyPercent) != 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
248                 
249                 return results; 
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "Unweighted", "driver");
253                 exit(1);
254         }
255 }
256 /**************************************************************************************************/
257
258 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
259  try {
260                 globaldata = GlobalData::getInstance();
261                 processors = p;
262                 outputDir = o;
263                 
264                 //if the users enters no groups then give them the score of all groups
265                 int numGroups = globaldata->Groups.size();
266                 
267                 //calculate number of comparsions
268                 int numComp = 0;
269                 vector< vector<string> > namesOfGroupCombos;
270                 for (int r=0; r<numGroups; r++) { 
271                         for (int l = 0; l < r; l++) {
272                                 numComp++;
273                                 vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
274                                 namesOfGroupCombos.push_back(groups);
275                         }
276                 }
277                 
278                 if (numComp != 1) {
279                         vector<string> groups;
280                         if (numGroups == 0) {
281                                 //get score for all users groups
282                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
283                                         if (tmap->namesOfGroups[i] != "xxx") {
284                                                 groups.push_back(tmap->namesOfGroups[i]);
285                                         }
286                                 }
287                                 namesOfGroupCombos.push_back(groups);
288                         }else {
289                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
290                                         groups.push_back(globaldata->Groups[i]);
291                                 }
292                                 namesOfGroupCombos.push_back(groups);
293                         }
294                 }
295
296                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
297                         if(processors == 1){
298                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
299                         }else{
300                                 int numPairs = namesOfGroupCombos.size();
301                                 
302                                 int numPairsPerProcessor = numPairs / processors;
303                                 
304                                 for (int i = 0; i < processors; i++) {
305                                         int startPos = i * numPairsPerProcessor;
306                                         if(i == processors - 1){
307                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
308                                         }
309                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
310                                 }
311                                         
312                                 data = createProcesses(t, namesOfGroupCombos, true);
313                                 
314                                 lines.clear();
315                         }
316                 #else
317                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
318                 #endif
319         
320                 return data;
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "Unweighted", "getValues");
324                 exit(1);
325         }
326 }
327 /**************************************************************************************************/
328
329 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
330         try {
331 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
332                 int process = 1;
333                 vector<int> processIDS;
334                 
335                 EstOutput results;
336                 
337                 //loop through and create all the processes you want
338                 while (process != processors) {
339                         int pid = fork();
340                         
341                         if (pid > 0) {
342                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
343                                 process++;
344                         }else if (pid == 0){
345                                 EstOutput myresults;
346                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
347                                 
348                                 if (m->control_pressed) { exit(0); }
349                                 
350                                 //pass numSeqs to parent
351                                 ofstream out;
352                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
353                                 m->openOutputFile(tempFile, out);
354                                 out << myresults.size() << endl;
355                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
356                                 out.close();
357                                 
358                                 exit(0);
359                         }else { 
360                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
361                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
362                                 exit(0); 
363                         }
364                 }
365                 
366                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
367                 
368                 //force parent to wait until all the processes are done
369                 for (int i=0;i<(processors-1);i++) { 
370                         int temp = processIDS[i];
371                         wait(&temp);
372                 }
373                 
374                 if (m->control_pressed) { return results; }
375                 
376                 //get data created by processes
377                 for (int i=0;i<(processors-1);i++) { 
378                         ifstream in;
379                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
380                         m->openInputFile(s, in);
381                         
382                         //get quantiles
383                         if (!in.eof()) {
384                                 int num;
385                                 in >> num; m->gobble(in);
386                                         
387                                 if (m->control_pressed) { break; }
388                                 
389                                 double w; 
390                                 for (int j = 0; j < num; j++) {
391                                         in >> w;
392                                         
393                                         results.push_back(w);
394                                 }
395                                 m->gobble(in);
396                         }
397                         in.close();
398                         remove(s.c_str());
399                 }
400                 
401                 return results;
402 #endif          
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "Unweighted", "createProcesses");
406                 exit(1);
407         }
408 }
409 /**************************************************************************************************/
410 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
411  try {
412                 
413                 EstOutput results; results.resize(num);
414                 
415                 int count = 0;
416                 
417                 Tree* copyTree = new Tree;
418                 
419                 for (int h = start; h < (start+num); h++) {
420                 
421                         if (m->control_pressed) { return results; }
422                 
423                         //copy random tree passed in
424                         copyTree->getCopy(t);
425                                 
426                         //swap labels in the groups you want to compare
427                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
428                         
429                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
430                         double totalBL = 0.00;  //all branch lengths
431                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
432                         //find a node that belongs to one of the groups in this combo
433                         int nodeBelonging = -1;
434                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
435                                 if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
436                         }
437                         
438                         //sanity check
439                         if (nodeBelonging == -1) {
440                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
441                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
442                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
443                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
444                         }else{
445                                 
446                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
447                                 getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
448                                 
449                                 for(int i=0;i<copyTree->getNumNodes();i++){
450                                         
451                                         if (m->control_pressed) {  return data; }
452                                         
453                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
454                                         //pcountSize = 2, not unique to one group
455                                         //pcountSize = 1, unique to one group
456                                         
457                                         int pcountSize = 0;
458                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
459                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
460                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
461                                         }
462                                         
463                                         //unique calc
464                                         if (pcountSize == 0) { }
465                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
466                                                 UniqueBL += abs(copyTree->tree[i].getBranchLength()); 
467                                         }
468                                         
469                                         //total calc
470                                         if (pcountSize == 0) { }
471                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
472                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
473                                         }
474                                         
475                                 }
476                                 //cout << UniqueBL << '\t' << totalBL << endl;          
477                                 UW = (UniqueBL / totalBL);  
478                                 
479                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
480                                 
481                                 results[count] = UW;
482                         }
483                         count++;
484                         
485                 }
486                 
487                 delete copyTree;
488                 
489                 return results; 
490         }
491         catch(exception& e) {
492                 m->errorOut(e, "Unweighted", "driver");
493                 exit(1);
494         }
495 }
496 /**************************************************************************************************/
497 int Unweighted::getRoot(Tree* t, int v, vector<string> grouping) { 
498         try {
499                 //you are a leaf so get your parent
500                 int index = t->tree[index].getParent();
501                 
502                 if (includeRoot) { 
503                         rootForGrouping[grouping].clear();
504                 }else {
505                         //my parent is a potential root
506                         rootForGrouping[grouping].insert(index);
507                         
508                         //while you aren't at root
509                         while(t->tree[index].getParent() != -1){
510                                 
511                                 if (m->control_pressed) {  return 0; }
512                                 
513                                 //am I the root for this grouping? if so I want to stop "early"
514                                 //does my sibling have descendants from the users groups? 
515                                 //if so I am not the root
516                                 int parent = t->tree[index].getParent();
517                                 int lc = t->tree[parent].getLChild();
518                                 int rc = t->tree[parent].getRChild();
519                                 
520                                 int sib = lc;
521                                 if (lc == index) { sib = rc; }
522                                 
523                                 map<string, int>::iterator itGroup;
524                                 int pcountSize = 0;
525                                 for (int j = 0; j < grouping.size(); j++) {
526                                         map<string, int>::iterator itGroup = t->tree[sib].pcount.find(grouping[j]);
527                                         if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
528                                 }
529                                 
530                                 //if yes, I am not the root
531                                 if (pcountSize != 0) {
532                                         rootForGrouping[grouping].clear();
533                                         rootForGrouping[grouping].insert(parent);
534                                 }
535                                 
536                                 index = parent; 
537                         }
538                         
539                         //get all nodes above the root to add so we don't add their u values above
540                         index = *(rootForGrouping[grouping].begin());
541                         while(t->tree[index].getParent() != -1){
542                                 int parent = t->tree[index].getParent();
543                                 rootForGrouping[grouping].insert(parent);
544                                 //cout << parent << " in root" << endl;
545                                 index = parent;
546                         }
547                 }
548                 
549                 return 0;
550         }
551         catch(exception& e) {
552                 m->errorOut(e, "Unweighted", "getRoot");
553                 exit(1);
554         }
555 }
556 /**************************************************************************************************/
557
558