]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
added modify names parameter to set.dir
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 processors = p;
17                 outputDir = o;
18         
19         CountTable* ct = t->getCountTable();
20         
21                 //if the users enters no groups then give them the score of all groups
22                 int numGroups = m->getNumGroups();
23                 
24                 //calculate number of comparsions
25                 int numComp = 0;
26                 vector< vector<string> > namesOfGroupCombos;
27                 for (int r=0; r<numGroups; r++) { 
28                         for (int l = 0; l < r; l++) {
29                                 numComp++;
30                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
31                                 namesOfGroupCombos.push_back(groups);
32                         }
33                 }
34                 
35                 if (numComp != 1) {
36                         vector<string> groups;
37                         if (numGroups == 0) {
38                                 //get score for all users groups
39                                 for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
40                                         if ((ct->getNamesOfGroups())[i] != "xxx") {
41                                                 groups.push_back((ct->getNamesOfGroups())[i]);
42                                         }
43                                 }
44                                 namesOfGroupCombos.push_back(groups);
45                         }else {
46                                 for (int i = 0; i < m->getNumGroups(); i++) {
47                                         groups.push_back((m->getGroups())[i]);
48                                 }
49                                 namesOfGroupCombos.push_back(groups);
50                         }
51                 }
52         
53         lines.clear();
54         int numPairs = namesOfGroupCombos.size();
55         int numPairsPerProcessor = numPairs / processors;
56         
57         for (int i = 0; i < processors; i++) {
58             int startPos = i * numPairsPerProcessor;
59             if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
60             lines.push_back(linePair(startPos, numPairsPerProcessor));
61         }
62         
63         data = createProcesses(t, namesOfGroupCombos, ct);
64
65         lines.clear();
66         
67                 return data;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "Unweighted", "getValues");
71                 exit(1);
72         }
73 }
74 /**************************************************************************************************/
75
76 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
77         try {
78         int process = 1;
79                 vector<int> processIDS;
80                 
81                 EstOutput results;
82 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
83                 
84                 
85                 //loop through and create all the processes you want
86                 while (process != processors) {
87                         int pid = fork();
88                         
89                         if (pid > 0) {
90                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
91                                 process++;
92                         }else if (pid == 0){
93                                 EstOutput myresults;
94                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
95                                 
96                                 if (m->control_pressed) { exit(0); }
97                                 
98                                 //m->mothurOut("Merging results."); m->mothurOutEndLine();
99                                 
100                                 //pass numSeqs to parent
101                                 ofstream out;
102                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
103                                 m->openOutputFile(tempFile, out);
104                                 out << myresults.size() << endl;
105                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
106                                 out.close();
107                                 
108                                 exit(0);
109                         }else { 
110                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
111                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
112                                 exit(0); 
113                         }
114                 }
115                 
116                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
117                 
118                 //force parent to wait until all the processes are done
119                 for (int i=0;i<(processors-1);i++) { 
120                         int temp = processIDS[i];
121                         wait(&temp);
122                 }
123                 
124                 if (m->control_pressed) { return results; }
125                 
126                 //get data created by processes
127                 for (int i=0;i<(processors-1);i++) { 
128                         ifstream in;
129                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
130                         m->openInputFile(s, in);
131                         
132                         //get quantiles
133                         if (!in.eof()) {
134                                 int num;
135                                 in >> num; m->gobble(in);
136                                 
137                                 if (m->control_pressed) { break; }
138                                 
139                                 double w; 
140                                 for (int j = 0; j < num; j++) {
141                                         in >> w;
142                                         results.push_back(w);
143                                 }
144                                 m->gobble(in);
145                         }
146                         in.close();
147                         m->mothurRemove(s);
148                 }
149 #else
150                 //fill in functions
151         vector<unweightedData*> pDataArray;
152                 DWORD   dwThreadIdArray[processors-1];
153                 HANDLE  hThreadArray[processors-1];
154         vector<CountTable*> cts;
155         vector<Tree*> trees;
156                 
157                 //Create processor worker threads.
158                 for( int i=1; i<processors; i++ ){
159             CountTable* copyCount = new CountTable();
160             copyCount->copy(ct);
161             Tree* copyTree = new Tree(copyCount);
162             copyTree->getCopy(t);
163             
164             cts.push_back(copyCount);
165             trees.push_back(copyTree);
166             
167             unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
168                         pDataArray.push_back(tempweighted);
169                         processIDS.push_back(i);
170             
171                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
172                 }
173                 
174                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
175                 
176                 //Wait until all threads have terminated.
177                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
178                 
179                 //Close all thread handles and free memory allocations.
180                 for(int i=0; i < pDataArray.size(); i++){
181             for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
182                         delete cts[i];
183             delete trees[i];
184                         CloseHandle(hThreadArray[i]);
185                         delete pDataArray[i];
186                 }
187
188 #endif  
189         return results;
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "Unweighted", "createProcesses");
193                 exit(1);
194         }
195 }
196 /**************************************************************************************************/
197 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) { 
198  try {
199         
200          
201                 EstOutput results; results.resize(num);
202                 
203                 int count = 0;
204                 int total = num;
205                                         
206                 for (int h = start; h < (start+num); h++) {
207                                 
208                         if (m->control_pressed) { return results; }
209                 
210                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
211                         double totalBL = 0.00;  //all branch lengths
212                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
213                                                 
214                         //find a node that belongs to one of the groups in this combo
215                         int nodeBelonging = -1;
216                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
217                                 if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
218                         }
219                         
220                         //sanity check
221                         if (nodeBelonging == -1) {
222                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
223                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
224                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
225                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
226                         }else{
227                                 //cout << "trying to get root" << endl; 
228                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
229                                 getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
230                                 //cout << "here" << endl;       
231                                 for(int i=0;i<t->getNumNodes();i++){
232                                         
233                                         if (m->control_pressed) {  return data; }
234                                         //cout << i << endl;    
235                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
236                                         //pcountSize = 2, not unique to one group
237                                         //pcountSize = 1, unique to one group
238                                         
239                                         int pcountSize = 0;
240                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
241                                                 map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
242                                                 if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
243                                         }
244                                         
245                                         
246                                         //unique calc
247                                         if (pcountSize == 0) { }
248                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
249                                                 UniqueBL += abs(t->tree[i].getBranchLength()); 
250                                         }
251                                                 
252                                         //total calc
253                                         if (pcountSize == 0) { }
254                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
255                                                 totalBL += abs(t->tree[i].getBranchLength()); 
256                                         }
257                                 }
258         //cout << UniqueBL << '\t' << totalBL << endl;          
259                                 UW = (UniqueBL / totalBL);  
260         
261                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
262         
263                                 results[count] = UW;
264                         }
265                         count++;
266
267         }
268                 
269                 return results; 
270         }
271         catch(exception& e) {
272                 m->errorOut(e, "Unweighted", "driver");
273                 exit(1);
274         }
275 }
276 /**************************************************************************************************/
277
278 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
279  try {
280                 processors = p;
281                 outputDir = o;
282                 
283         CountTable* ct = t->getCountTable();
284      
285                 //if the users enters no groups then give them the score of all groups
286                 int numGroups = m->getNumGroups();
287                 
288                 //calculate number of comparsions
289                 int numComp = 0;
290                 vector< vector<string> > namesOfGroupCombos;
291                 for (int r=0; r<numGroups; r++) { 
292                         for (int l = 0; l < r; l++) {
293                                 numComp++;
294                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
295                                 namesOfGroupCombos.push_back(groups);
296                         }
297                 }
298                 
299                 if (numComp != 1) {
300                         vector<string> groups;
301                         if (numGroups == 0) {
302                                 //get score for all users groups
303                                 for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
304                                         if ((ct->getNamesOfGroups())[i] != "xxx") {
305                                                 groups.push_back((ct->getNamesOfGroups())[i]);
306                                         }
307                                 }
308                                 namesOfGroupCombos.push_back(groups);
309                         }else {
310                                 for (int i = 0; i < m->getNumGroups(); i++) {
311                                         groups.push_back((m->getGroups())[i]);
312                                 }
313                                 namesOfGroupCombos.push_back(groups);
314                         }
315                 }
316      
317         lines.clear();
318         int numPairs = namesOfGroupCombos.size();
319         int numPairsPerProcessor = numPairs / processors;
320      
321         for (int i = 0; i < processors; i++) {
322             int startPos = i * numPairsPerProcessor;
323             if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
324             lines.push_back(linePair(startPos, numPairsPerProcessor));
325         }
326      
327         data = createProcesses(t, namesOfGroupCombos, true, ct);
328         lines.clear();
329
330                 return data;
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "Unweighted", "getValues");
334                 exit(1);
335         }
336 }
337 /**************************************************************************************************/
338
339 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
340         try {
341         int process = 1;
342                 vector<int> processIDS;
343                 
344                 EstOutput results;
345 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
346
347                 //loop through and create all the processes you want
348                 while (process != processors) {
349                         int pid = fork();
350                         
351                         if (pid > 0) {
352                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
353                                 process++;
354                         }else if (pid == 0){
355                                 EstOutput myresults;
356                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, ct);
357                                 
358                                 if (m->control_pressed) { exit(0); }
359                                 
360                                 //pass numSeqs to parent
361                                 ofstream out;
362                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
363                                 m->openOutputFile(tempFile, out);
364                                 out << myresults.size() << endl;
365                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
366                                 out.close();
367                                 
368                                 exit(0);
369                         }else { 
370                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
371                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
372                                 exit(0); 
373                         }
374                 }
375                 
376                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
377                 
378                 //force parent to wait until all the processes are done
379                 for (int i=0;i<(processors-1);i++) { 
380                         int temp = processIDS[i];
381                         wait(&temp);
382                 }
383                 
384                 if (m->control_pressed) { return results; }
385                 
386                 //get data created by processes
387                 for (int i=0;i<(processors-1);i++) { 
388                         ifstream in;
389                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
390                         m->openInputFile(s, in);
391                         
392                         //get quantiles
393                         if (!in.eof()) {
394                                 int num;
395                                 in >> num; m->gobble(in);
396                                         
397                                 if (m->control_pressed) { break; }
398                                 
399                                 double w; 
400                                 for (int j = 0; j < num; j++) {
401                                         in >> w;
402                                         
403                                         results.push_back(w);
404                                 }
405                                 m->gobble(in);
406                         }
407                         in.close();
408                         m->mothurRemove(s);
409                 }
410 #else
411        //for some reason it doesn't seem to be calculating hte random trees scores.  all scores are the same even though copytree appears to be randomized.
412         
413         /*
414         //fill in functions
415         vector<unweightedData*> pDataArray;
416                 DWORD   dwThreadIdArray[processors-1];
417                 HANDLE  hThreadArray[processors-1];
418         vector<CountTable*> cts;
419         vector<Tree*> trees;
420                 
421                 //Create processor worker threads.
422                 for( int i=1; i<processors; i++ ){
423             CountTable* copyCount = new CountTable();
424             copyCount->copy(ct);
425             Tree* copyTree = new Tree(copyCount);
426             copyTree->getCopy(t);
427             
428             cts.push_back(copyCount);
429             trees.push_back(copyTree);
430             
431             unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
432                         pDataArray.push_back(tempweighted);
433                         processIDS.push_back(i);
434             
435                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
436                 }
437                 
438                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
439                 
440                 //Wait until all threads have terminated.
441                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
442                 
443                 //Close all thread handles and free memory allocations.
444                 for(int i=0; i < pDataArray.size(); i++){
445             for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
446                         delete cts[i];
447             delete trees[i];
448                         CloseHandle(hThreadArray[i]);
449                         delete pDataArray[i];
450                 }       */
451         
452         results = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), usingGroups, ct);
453 #endif
454         return results;
455         }
456         catch(exception& e) {
457                 m->errorOut(e, "Unweighted", "createProcesses");
458                 exit(1);
459         }
460 }
461 /**************************************************************************************************/
462 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, CountTable* ct) { 
463  try {
464                 
465                 EstOutput results; results.resize(num);
466                 
467                 int count = 0;
468                 
469                 Tree* copyTree = new Tree(ct);
470                 
471                 for (int h = start; h < (start+num); h++) {
472                 
473                         if (m->control_pressed) { return results; }
474                 
475                         //copy random tree passed in
476                         copyTree->getCopy(t);
477                                 
478                         //swap labels in the groups you want to compare
479                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
480                         
481                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
482                         double totalBL = 0.00;  //all branch lengths
483                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
484                         //find a node that belongs to one of the groups in this combo
485                         int nodeBelonging = -1;
486                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
487                                 if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
488                         }
489                         
490                         //sanity check
491                         if (nodeBelonging == -1) {
492                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
493                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
494                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
495                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
496                         }else{
497                                 
498                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
499                                 getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
500                                 
501                                 for(int i=0;i<copyTree->getNumNodes();i++){
502                                         
503                                         if (m->control_pressed) {  return data; }
504                                         
505                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
506                                         //pcountSize = 2, not unique to one group
507                                         //pcountSize = 1, unique to one group
508                                         
509                                         int pcountSize = 0;
510                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
511                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
512                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
513                                         }
514                                         
515                                         //unique calc
516                                         if (pcountSize == 0) { }
517                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
518                                                 UniqueBL += abs(copyTree->tree[i].getBranchLength()); 
519                                         }
520                                         
521                                         //total calc
522                                         if (pcountSize == 0) { }
523                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
524                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
525                                         }
526                                         
527                                 }
528                                 //cout << UniqueBL << '\t' << totalBL << endl;          
529                                 UW = (UniqueBL / totalBL);  
530                                 
531                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
532                                 
533                                 results[count] = UW;
534             }
535                         count++;
536                         
537                 }
538                 
539                 delete copyTree;
540                 
541                 return results; 
542         }
543         catch(exception& e) {
544                 m->errorOut(e, "Unweighted", "driver");
545                 exit(1);
546         }
547 }
548 /**************************************************************************************************/
549 int Unweighted::getRoot(Tree* t, int v, vector<string> grouping) { 
550         try {
551                 //you are a leaf so get your parent
552                 int index = t->tree[v].getParent();
553                 
554                 if (includeRoot) { 
555                         rootForGrouping[grouping].clear();
556                 }else {
557                         
558                         //my parent is a potential root
559                         rootForGrouping[grouping].insert(index);
560                         
561                         //while you aren't at root
562                         while(t->tree[index].getParent() != -1){
563                                 //cout << index << endl;        
564                                 if (m->control_pressed) {  return 0; }
565                                 
566                                 //am I the root for this grouping? if so I want to stop "early"
567                                 //does my sibling have descendants from the users groups? 
568                                 //if so I am not the root
569                                 int parent = t->tree[index].getParent();
570                                 int lc = t->tree[parent].getLChild();
571                                 int rc = t->tree[parent].getRChild();
572                                 
573                                 int sib = lc;
574                                 if (lc == index) { sib = rc; }
575                                 
576                                 map<string, int>::iterator itGroup;
577                                 int pcountSize = 0;
578                                 for (int j = 0; j < grouping.size(); j++) {
579                                         map<string, int>::iterator itGroup = t->tree[sib].pcount.find(grouping[j]);
580                                         if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
581                                 }
582                                 
583                                 //if yes, I am not the root
584                                 if (pcountSize != 0) {
585                                         rootForGrouping[grouping].clear();
586                                         rootForGrouping[grouping].insert(parent);
587                                 }
588                                 
589                                 index = parent; 
590                         }
591                         
592                         //get all nodes above the root to add so we don't add their u values above
593                         index = *(rootForGrouping[grouping].begin());
594                         while(t->tree[index].getParent() != -1){
595                                 int parent = t->tree[index].getParent();
596                                 rootForGrouping[grouping].insert(parent);
597                                 //cout << parent << " in root" << endl;
598                                 index = parent;
599                         }
600                 }
601                 
602                 return 0;
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "Unweighted", "getRoot");
606                 exit(1);
607         }
608 }
609 /**************************************************************************************************/
610
611