]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
fixes while testing 1.33.0
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 processors = p;
17                 outputDir = o;
18         
19         CountTable* ct = t->getCountTable();
20         
21                 //if the users enters no groups then give them the score of all groups
22                 int numGroups = m->getNumGroups();
23                 
24                 //calculate number of comparsions
25                 int numComp = 0;
26                 vector< vector<string> > namesOfGroupCombos;
27                 for (int r=0; r<numGroups; r++) { 
28                         for (int l = 0; l < r; l++) {
29                                 numComp++;
30                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
31                                 namesOfGroupCombos.push_back(groups);
32                         }
33                 }
34                 
35                 if (numComp != 1) {
36                         vector<string> groups;
37                         if (numGroups == 0) {
38                                 //get score for all users groups
39                                 for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
40                                         if ((ct->getNamesOfGroups())[i] != "xxx") {
41                                                 groups.push_back((ct->getNamesOfGroups())[i]);
42                                         }
43                                 }
44                                 namesOfGroupCombos.push_back(groups);
45                         }else {
46                                 for (int i = 0; i < m->getNumGroups(); i++) {
47                                         groups.push_back((m->getGroups())[i]);
48                                 }
49                                 namesOfGroupCombos.push_back(groups);
50                         }
51                 }
52         
53         lines.clear();
54         int remainingPairs = namesOfGroupCombos.size();
55         int startIndex = 0;
56         for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
57             int numPairs = remainingPairs; //case for last processor
58             if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
59             lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
60             startIndex = startIndex + numPairs;
61             remainingPairs = remainingPairs - numPairs;
62         }
63         
64         data = createProcesses(t, namesOfGroupCombos, ct);
65
66         lines.clear();
67         
68                 return data;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "Unweighted", "getValues");
72                 exit(1);
73         }
74 }
75 /**************************************************************************************************/
76
77 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
78         try {
79         int process = 1;
80                 vector<int> processIDS;
81                 
82                 EstOutput results;
83 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
84                 
85                 
86                 //loop through and create all the processes you want
87                 while (process != processors) {
88                         int pid = fork();
89                         
90                         if (pid > 0) {
91                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
92                                 process++;
93                         }else if (pid == 0){
94                                 EstOutput myresults;
95                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
96                                 
97                                 if (m->control_pressed) { exit(0); }
98                                 
99                                 //m->mothurOut("Merging results."); m->mothurOutEndLine();
100                                 
101                                 //pass numSeqs to parent
102                                 ofstream out;
103                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
104                                 m->openOutputFile(tempFile, out);
105                                 out << myresults.size() << endl;
106                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
107                                 out.close();
108                                 
109                                 exit(0);
110                         }else { 
111                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
112                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
113                                 exit(0); 
114                         }
115                 }
116                 
117                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
118                 
119                 //force parent to wait until all the processes are done
120                 for (int i=0;i<(processors-1);i++) { 
121                         int temp = processIDS[i];
122                         wait(&temp);
123                 }
124                 
125                 if (m->control_pressed) { return results; }
126                 
127                 //get data created by processes
128                 for (int i=0;i<(processors-1);i++) { 
129                         ifstream in;
130                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
131                         m->openInputFile(s, in);
132                         
133                         //get quantiles
134                         if (!in.eof()) {
135                                 int num;
136                                 in >> num; m->gobble(in);
137                                 
138                                 if (m->control_pressed) { break; }
139                                 
140                                 double w; 
141                                 for (int j = 0; j < num; j++) {
142                                         in >> w;
143                                         results.push_back(w);
144                                 }
145                                 m->gobble(in);
146                         }
147                         in.close();
148                         m->mothurRemove(s);
149                 }
150 #else
151                 //fill in functions
152         vector<unweightedData*> pDataArray;
153                 DWORD   dwThreadIdArray[processors-1];
154                 HANDLE  hThreadArray[processors-1];
155         vector<CountTable*> cts;
156         vector<Tree*> trees;
157                 
158                 //Create processor worker threads.
159                 for( int i=1; i<processors; i++ ){
160             CountTable* copyCount = new CountTable();
161             copyCount->copy(ct);
162             Tree* copyTree = new Tree(copyCount);
163             copyTree->getCopy(t);
164             
165             cts.push_back(copyCount);
166             trees.push_back(copyTree);
167             
168             unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
169                         pDataArray.push_back(tempweighted);
170                         processIDS.push_back(i);
171             
172                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
173                 }
174                 
175                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
176                 
177                 //Wait until all threads have terminated.
178                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
179                 
180                 //Close all thread handles and free memory allocations.
181                 for(int i=0; i < pDataArray.size(); i++){
182             for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
183                         delete cts[i];
184             delete trees[i];
185                         CloseHandle(hThreadArray[i]);
186                         delete pDataArray[i];
187                 }
188
189 #endif  
190         return results;
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "Unweighted", "createProcesses");
194                 exit(1);
195         }
196 }
197 /**************************************************************************************************/
198 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) { 
199  try {
200         
201          
202                 EstOutput results; results.resize(num);
203                 
204                 int count = 0;
205                 int total = num;
206                                         
207                 for (int h = start; h < (start+num); h++) {
208                                 
209                         if (m->control_pressed) { return results; }
210                 
211                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
212                         double totalBL = 0.00;  //all branch lengths
213                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
214                                                 
215                         //find a node that belongs to one of the groups in this combo
216                         int nodeBelonging = -1;
217                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
218                                 if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
219                         }
220                         
221                         //sanity check
222                         if (nodeBelonging == -1) {
223                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
224                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
225                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
226                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
227                         }else{
228                                 //cout << "trying to get root" << endl; 
229                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
230                                 getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
231                                 //cout << "here" << endl;       
232                                 for(int i=0;i<t->getNumNodes();i++){
233                                         
234                                         if (m->control_pressed) {  return data; }
235                                         //cout << i << endl;    
236                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
237                                         //pcountSize = 2, not unique to one group
238                                         //pcountSize = 1, unique to one group
239                                         
240                                         int pcountSize = 0;
241                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
242                                                 map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
243                                                 if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
244                                         }
245                                         
246                                         
247                                         //unique calc
248                                         if (pcountSize == 0) { }
249                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
250                                                 UniqueBL += abs(t->tree[i].getBranchLength()); 
251                                         }
252                                                 
253                                         //total calc
254                                         if (pcountSize == 0) { }
255                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
256                                                 totalBL += abs(t->tree[i].getBranchLength()); 
257                                         }
258                                 }
259         //cout << UniqueBL << '\t' << totalBL << endl;          
260                                 UW = (UniqueBL / totalBL);  
261         
262                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
263         
264                                 results[count] = UW;
265                         }
266                         count++;
267
268         }
269                 
270                 return results; 
271         }
272         catch(exception& e) {
273                 m->errorOut(e, "Unweighted", "driver");
274                 exit(1);
275         }
276 }
277 /**************************************************************************************************/
278
279 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
280  try {
281                 processors = p;
282                 outputDir = o;
283                 
284         CountTable* ct = t->getCountTable();
285      
286                 //if the users enters no groups then give them the score of all groups
287                 int numGroups = m->getNumGroups();
288                 
289                 //calculate number of comparsions
290                 int numComp = 0;
291                 vector< vector<string> > namesOfGroupCombos;
292                 for (int r=0; r<numGroups; r++) { 
293                         for (int l = 0; l < r; l++) {
294                                 numComp++;
295                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
296                                 namesOfGroupCombos.push_back(groups);
297                         }
298                 }
299                 
300                 if (numComp != 1) {
301                         vector<string> groups;
302                         if (numGroups == 0) {
303                                 //get score for all users groups
304                                 for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
305                                         if ((ct->getNamesOfGroups())[i] != "xxx") {
306                                                 groups.push_back((ct->getNamesOfGroups())[i]);
307                                         }
308                                 }
309                                 namesOfGroupCombos.push_back(groups);
310                         }else {
311                                 for (int i = 0; i < m->getNumGroups(); i++) {
312                                         groups.push_back((m->getGroups())[i]);
313                                 }
314                                 namesOfGroupCombos.push_back(groups);
315                         }
316                 }
317      
318         lines.clear();
319         int numPairs = namesOfGroupCombos.size();
320         int numPairsPerProcessor = ceil(numPairs / processors);
321      
322         for (int i = 0; i < processors; i++) {
323             int startPos = i * numPairsPerProcessor;
324             if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
325             lines.push_back(linePair(startPos, numPairsPerProcessor));
326         }
327      
328         data = createProcesses(t, namesOfGroupCombos, true, ct);
329         lines.clear();
330
331                 return data;
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "Unweighted", "getValues");
335                 exit(1);
336         }
337 }
338 /**************************************************************************************************/
339
340 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
341         try {
342         int process = 1;
343                 vector<int> processIDS;
344                 
345                 EstOutput results;
346 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
347
348                 //loop through and create all the processes you want
349                 while (process != processors) {
350                         int pid = fork();
351                         
352                         if (pid > 0) {
353                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
354                                 process++;
355                         }else if (pid == 0){
356                                 EstOutput myresults;
357                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, ct);
358                                 
359                                 if (m->control_pressed) { exit(0); }
360                                 
361                                 //pass numSeqs to parent
362                                 ofstream out;
363                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
364                                 m->openOutputFile(tempFile, out);
365                                 out << myresults.size() << endl;
366                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
367                                 out.close();
368                                 
369                                 exit(0);
370                         }else { 
371                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
372                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
373                                 exit(0); 
374                         }
375                 }
376                 
377                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
378                 
379                 //force parent to wait until all the processes are done
380                 for (int i=0;i<(processors-1);i++) { 
381                         int temp = processIDS[i];
382                         wait(&temp);
383                 }
384                 
385                 if (m->control_pressed) { return results; }
386                 
387                 //get data created by processes
388                 for (int i=0;i<(processors-1);i++) { 
389                         ifstream in;
390                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
391                         m->openInputFile(s, in);
392                         
393                         //get quantiles
394                         if (!in.eof()) {
395                                 int num;
396                                 in >> num; m->gobble(in);
397                                         
398                                 if (m->control_pressed) { break; }
399                                 
400                                 double w; 
401                                 for (int j = 0; j < num; j++) {
402                                         in >> w;
403                                         
404                                         results.push_back(w);
405                                 }
406                                 m->gobble(in);
407                         }
408                         in.close();
409                         m->mothurRemove(s);
410                 }
411 #else
412        //for some reason it doesn't seem to be calculating hte random trees scores.  all scores are the same even though copytree appears to be randomized.
413         
414         /*
415         //fill in functions
416         vector<unweightedData*> pDataArray;
417                 DWORD   dwThreadIdArray[processors-1];
418                 HANDLE  hThreadArray[processors-1];
419         vector<CountTable*> cts;
420         vector<Tree*> trees;
421                 
422                 //Create processor worker threads.
423                 for( int i=1; i<processors; i++ ){
424             CountTable* copyCount = new CountTable();
425             copyCount->copy(ct);
426             Tree* copyTree = new Tree(copyCount);
427             copyTree->getCopy(t);
428             
429             cts.push_back(copyCount);
430             trees.push_back(copyTree);
431             
432             unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
433                         pDataArray.push_back(tempweighted);
434                         processIDS.push_back(i);
435             
436                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
437                 }
438                 
439                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
440                 
441                 //Wait until all threads have terminated.
442                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
443                 
444                 //Close all thread handles and free memory allocations.
445                 for(int i=0; i < pDataArray.size(); i++){
446             for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
447                         delete cts[i];
448             delete trees[i];
449                         CloseHandle(hThreadArray[i]);
450                         delete pDataArray[i];
451                 }       */
452         
453         results = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), usingGroups, ct);
454 #endif
455         return results;
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "Unweighted", "createProcesses");
459                 exit(1);
460         }
461 }
462 /**************************************************************************************************/
463 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, CountTable* ct) { 
464  try {
465                 
466                 EstOutput results; results.resize(num);
467                 
468                 int count = 0;
469                 
470                 Tree* copyTree = new Tree(ct);
471                 
472                 for (int h = start; h < (start+num); h++) {
473                 
474                         if (m->control_pressed) { return results; }
475                 
476                         //copy random tree passed in
477                         copyTree->getCopy(t);
478                                 
479                         //swap labels in the groups you want to compare
480                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
481                         
482                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
483                         double totalBL = 0.00;  //all branch lengths
484                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
485                         //find a node that belongs to one of the groups in this combo
486                         int nodeBelonging = -1;
487                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
488                                 if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
489                         }
490                         
491                         //sanity check
492                         if (nodeBelonging == -1) {
493                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
494                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
495                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
496                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
497                         }else{
498                                 
499                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
500                                 getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
501                                 
502                                 for(int i=0;i<copyTree->getNumNodes();i++){
503                                         
504                                         if (m->control_pressed) {  return data; }
505                                         
506                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
507                                         //pcountSize = 2, not unique to one group
508                                         //pcountSize = 1, unique to one group
509                                         
510                                         int pcountSize = 0;
511                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
512                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
513                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
514                                         }
515                                         
516                                         //unique calc
517                                         if (pcountSize == 0) { }
518                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
519                                                 UniqueBL += abs(copyTree->tree[i].getBranchLength()); 
520                                         }
521                                         
522                                         //total calc
523                                         if (pcountSize == 0) { }
524                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
525                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
526                                         }
527                                         
528                                 }
529                                 //cout << UniqueBL << '\t' << totalBL << endl;          
530                                 UW = (UniqueBL / totalBL);  
531                                 
532                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
533                                 
534                                 results[count] = UW;
535             }
536                         count++;
537                         
538                 }
539                 
540                 delete copyTree;
541                 
542                 return results; 
543         }
544         catch(exception& e) {
545                 m->errorOut(e, "Unweighted", "driver");
546                 exit(1);
547         }
548 }
549 /**************************************************************************************************/
550 int Unweighted::getRoot(Tree* t, int v, vector<string> grouping) { 
551         try {
552                 //you are a leaf so get your parent
553                 int index = t->tree[v].getParent();
554                 
555                 if (includeRoot) { 
556                         rootForGrouping[grouping].clear();
557                 }else {
558                         
559                         //my parent is a potential root
560                         rootForGrouping[grouping].insert(index);
561                         
562                         //while you aren't at root
563                         while(t->tree[index].getParent() != -1){
564                                 //cout << index << endl;        
565                                 if (m->control_pressed) {  return 0; }
566                                 
567                                 //am I the root for this grouping? if so I want to stop "early"
568                                 //does my sibling have descendants from the users groups? 
569                                 //if so I am not the root
570                                 int parent = t->tree[index].getParent();
571                                 int lc = t->tree[parent].getLChild();
572                                 int rc = t->tree[parent].getRChild();
573                                 
574                                 int sib = lc;
575                                 if (lc == index) { sib = rc; }
576                                 
577                                 map<string, int>::iterator itGroup;
578                                 int pcountSize = 0;
579                                 for (int j = 0; j < grouping.size(); j++) {
580                                         map<string, int>::iterator itGroup = t->tree[sib].pcount.find(grouping[j]);
581                                         if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
582                                 }
583                                 
584                                 //if yes, I am not the root
585                                 if (pcountSize != 0) {
586                                         rootForGrouping[grouping].clear();
587                                         rootForGrouping[grouping].insert(parent);
588                                 }
589                                 
590                                 index = parent; 
591                         }
592                         
593                         //get all nodes above the root to add so we don't add their u values above
594                         index = *(rootForGrouping[grouping].begin());
595                         while(t->tree[index].getParent() != -1){
596                                 int parent = t->tree[index].getParent();
597                                 rootForGrouping[grouping].insert(parent);
598                                 //cout << parent << " in root" << endl;
599                                 index = parent;
600                         }
601                 }
602                 
603                 return 0;
604         }
605         catch(exception& e) {
606                 m->errorOut(e, "Unweighted", "getRoot");
607                 exit(1);
608         }
609 }
610 /**************************************************************************************************/
611
612