]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
added multiple processors option for Windows users to align.seqs, dist.seqs, summary...
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 processors = p;
17                 outputDir = o;
18                         
19                 //if the users enters no groups then give them the score of all groups
20                 int numGroups = m->getNumGroups();
21                 
22                 //calculate number of comparsions
23                 int numComp = 0;
24                 vector< vector<string> > namesOfGroupCombos;
25                 for (int r=0; r<numGroups; r++) { 
26                         for (int l = 0; l < r; l++) {
27                                 numComp++;
28                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
29                                 namesOfGroupCombos.push_back(groups);
30                         }
31                 }
32                 
33                 if (numComp != 1) {
34                         vector<string> groups;
35                         if (numGroups == 0) {
36                                 //get score for all users groups
37                                 for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
38                                         if ((tmap->getNamesOfGroups())[i] != "xxx") {
39                                                 groups.push_back((tmap->getNamesOfGroups())[i]);
40                                         }
41                                 }
42                                 namesOfGroupCombos.push_back(groups);
43                         }else {
44                                 for (int i = 0; i < m->getNumGroups(); i++) {
45                                         groups.push_back((m->getGroups())[i]);
46                                 }
47                                 namesOfGroupCombos.push_back(groups);
48                         }
49                 }
50
51                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
52                         if(processors == 1){
53                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
54                         }else{
55                                 int numPairs = namesOfGroupCombos.size();
56                                 
57                                 int numPairsPerProcessor = numPairs / processors;
58                                 
59                                 for (int i = 0; i < processors; i++) {
60                                         int startPos = i * numPairsPerProcessor;
61                         
62                                         if(i == processors - 1){
63                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
64                                         }
65                 
66                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
67                                 }
68                                 data = createProcesses(t, namesOfGroupCombos);
69                                 lines.clear();
70                         }
71                 #else
72                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
73                 #endif
74                 
75                 return data;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "Unweighted", "getValues");
79                 exit(1);
80         }
81 }
82 /**************************************************************************************************/
83
84 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
85         try {
86 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
87                 int process = 1;
88                 vector<int> processIDS;
89                 
90                 EstOutput results;
91                 
92                 //loop through and create all the processes you want
93                 while (process != processors) {
94                         int pid = fork();
95                         
96                         if (pid > 0) {
97                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
98                                 process++;
99                         }else if (pid == 0){
100                                 EstOutput myresults;
101                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
102                                 
103                                 if (m->control_pressed) { exit(0); }
104                                 
105                                 m->mothurOut("Merging results."); m->mothurOutEndLine();
106                                 
107                                 //pass numSeqs to parent
108                                 ofstream out;
109                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
110                                 m->openOutputFile(tempFile, out);
111                                 out << myresults.size() << endl;
112                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
113                                 out.close();
114                                 
115                                 exit(0);
116                         }else { 
117                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
118                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
119                                 exit(0); 
120                         }
121                 }
122                 
123                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
124                 
125                 //force parent to wait until all the processes are done
126                 for (int i=0;i<(processors-1);i++) { 
127                         int temp = processIDS[i];
128                         wait(&temp);
129                 }
130                 
131                 if (m->control_pressed) { return results; }
132                 
133                 //get data created by processes
134                 for (int i=0;i<(processors-1);i++) { 
135                         ifstream in;
136                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
137                         m->openInputFile(s, in);
138                         
139                         //get quantiles
140                         if (!in.eof()) {
141                                 int num;
142                                 in >> num; m->gobble(in);
143                                 
144                                 if (m->control_pressed) { break; }
145                                 
146                                 double w; 
147                                 for (int j = 0; j < num; j++) {
148                                         in >> w;
149                                         results.push_back(w);
150                                 }
151                                 m->gobble(in);
152                         }
153                         in.close();
154                         m->mothurRemove(s);
155                 }
156                 
157                 m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
158                 
159                 return results;
160 #endif          
161         }
162         catch(exception& e) {
163                 m->errorOut(e, "Unweighted", "createProcesses");
164                 exit(1);
165         }
166 }
167 /**************************************************************************************************/
168 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
169  try {
170         
171          
172                 EstOutput results; results.resize(num);
173                 
174                 int count = 0;
175                 int total = num;
176                 int twentyPercent = (total * 0.20);
177                 if (twentyPercent == 0) { twentyPercent = 1; }
178          
179                         
180                 for (int h = start; h < (start+num); h++) {
181                                 
182                         if (m->control_pressed) { return results; }
183                 
184                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
185                         double totalBL = 0.00;  //all branch lengths
186                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
187                                                 
188                         //find a node that belongs to one of the groups in this combo
189                         int nodeBelonging = -1;
190                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
191                                 if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
192                         }
193                         
194                         //sanity check
195                         if (nodeBelonging == -1) {
196                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
197                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
198                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
199                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
200                         }else{
201                                 //cout << "trying to get root" << endl; 
202                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
203                                 getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
204                                 //cout << "here" << endl;       
205                                 for(int i=0;i<t->getNumNodes();i++){
206                                         
207                                         if (m->control_pressed) {  return data; }
208                                         //cout << i << endl;    
209                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
210                                         //pcountSize = 2, not unique to one group
211                                         //pcountSize = 1, unique to one group
212                                         
213                                         int pcountSize = 0;
214                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
215                                                 map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
216                                                 if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
217                                         }
218                                         
219                                         
220                                         //unique calc
221                                         if (pcountSize == 0) { }
222                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
223                                                 UniqueBL += abs(t->tree[i].getBranchLength()); 
224                                         }
225                                                 
226                                         //total calc
227                                         if (pcountSize == 0) { }
228                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
229                                                 totalBL += abs(t->tree[i].getBranchLength()); 
230                                         }
231                                 }
232         //cout << UniqueBL << '\t' << totalBL << endl;          
233                                 UW = (UniqueBL / totalBL);  
234         
235                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
236         
237                                 results[count] = UW;
238                         }
239                         count++;
240
241                         //report progress
242                         //if((count % twentyPercent) == 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
243                 }
244                 
245                 //report progress
246                 //if((count % twentyPercent) != 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
247                 
248                 return results; 
249         }
250         catch(exception& e) {
251                 m->errorOut(e, "Unweighted", "driver");
252                 exit(1);
253         }
254 }
255 /**************************************************************************************************/
256
257 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
258  try {
259                 processors = p;
260                 outputDir = o;
261                 
262                 //if the users enters no groups then give them the score of all groups
263                 int numGroups = m->getNumGroups();
264                 
265                 //calculate number of comparsions
266                 int numComp = 0;
267                 vector< vector<string> > namesOfGroupCombos;
268                 for (int r=0; r<numGroups; r++) { 
269                         for (int l = 0; l < r; l++) {
270                                 numComp++;
271                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
272                                 namesOfGroupCombos.push_back(groups);
273                         }
274                 }
275                 
276                 if (numComp != 1) {
277                         vector<string> groups;
278                         if (numGroups == 0) {
279                                 //get score for all users groups
280                                 for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
281                                         if ((tmap->getNamesOfGroups())[i] != "xxx") {
282                                                 groups.push_back((tmap->getNamesOfGroups())[i]);
283                                         }
284                                 }
285                                 namesOfGroupCombos.push_back(groups);
286                         }else {
287                                 for (int i = 0; i < m->getNumGroups(); i++) {
288                                         groups.push_back((m->getGroups())[i]);
289                                 }
290                                 namesOfGroupCombos.push_back(groups);
291                         }
292                 }
293
294                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
295                         if(processors == 1){
296                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
297                         }else{
298                                 int numPairs = namesOfGroupCombos.size();
299                                 
300                                 int numPairsPerProcessor = numPairs / processors;
301                                 
302                                 for (int i = 0; i < processors; i++) {
303                                         int startPos = i * numPairsPerProcessor;
304                                         if(i == processors - 1){
305                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
306                                         }
307                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
308                                 }
309                                         
310                                 data = createProcesses(t, namesOfGroupCombos, true);
311                                 
312                                 lines.clear();
313                         }
314                 #else
315                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
316                 #endif
317         
318                 return data;
319         }
320         catch(exception& e) {
321                 m->errorOut(e, "Unweighted", "getValues");
322                 exit(1);
323         }
324 }
325 /**************************************************************************************************/
326
327 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
328         try {
329 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
330                 int process = 1;
331                 vector<int> processIDS;
332                 
333                 EstOutput results;
334                 
335                 //loop through and create all the processes you want
336                 while (process != processors) {
337                         int pid = fork();
338                         
339                         if (pid > 0) {
340                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
341                                 process++;
342                         }else if (pid == 0){
343                                 EstOutput myresults;
344                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups);
345                                 
346                                 if (m->control_pressed) { exit(0); }
347                                 
348                                 //pass numSeqs to parent
349                                 ofstream out;
350                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
351                                 m->openOutputFile(tempFile, out);
352                                 out << myresults.size() << endl;
353                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
354                                 out.close();
355                                 
356                                 exit(0);
357                         }else { 
358                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
359                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
360                                 exit(0); 
361                         }
362                 }
363                 
364                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups);
365                 
366                 //force parent to wait until all the processes are done
367                 for (int i=0;i<(processors-1);i++) { 
368                         int temp = processIDS[i];
369                         wait(&temp);
370                 }
371                 
372                 if (m->control_pressed) { return results; }
373                 
374                 //get data created by processes
375                 for (int i=0;i<(processors-1);i++) { 
376                         ifstream in;
377                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
378                         m->openInputFile(s, in);
379                         
380                         //get quantiles
381                         if (!in.eof()) {
382                                 int num;
383                                 in >> num; m->gobble(in);
384                                         
385                                 if (m->control_pressed) { break; }
386                                 
387                                 double w; 
388                                 for (int j = 0; j < num; j++) {
389                                         in >> w;
390                                         
391                                         results.push_back(w);
392                                 }
393                                 m->gobble(in);
394                         }
395                         in.close();
396                         m->mothurRemove(s);
397                 }
398                 
399                 return results;
400 #endif          
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "Unweighted", "createProcesses");
404                 exit(1);
405         }
406 }
407 /**************************************************************************************************/
408 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups) { 
409  try {
410                 
411                 EstOutput results; results.resize(num);
412                 
413                 int count = 0;
414                 
415                 Tree* copyTree = new Tree(tmap);
416                 
417                 for (int h = start; h < (start+num); h++) {
418                 
419                         if (m->control_pressed) { return results; }
420                 
421                         //copy random tree passed in
422                         copyTree->getCopy(t);
423                                 
424                         //swap labels in the groups you want to compare
425                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
426                         
427                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
428                         double totalBL = 0.00;  //all branch lengths
429                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
430                         //find a node that belongs to one of the groups in this combo
431                         int nodeBelonging = -1;
432                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
433                                 if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
434                         }
435                         
436                         //sanity check
437                         if (nodeBelonging == -1) {
438                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
439                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
440                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
441                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
442                         }else{
443                                 
444                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
445                                 getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
446                                 
447                                 for(int i=0;i<copyTree->getNumNodes();i++){
448                                         
449                                         if (m->control_pressed) {  return data; }
450                                         
451                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
452                                         //pcountSize = 2, not unique to one group
453                                         //pcountSize = 1, unique to one group
454                                         
455                                         int pcountSize = 0;
456                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
457                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
458                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
459                                         }
460                                         
461                                         //unique calc
462                                         if (pcountSize == 0) { }
463                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
464                                                 UniqueBL += abs(copyTree->tree[i].getBranchLength()); 
465                                         }
466                                         
467                                         //total calc
468                                         if (pcountSize == 0) { }
469                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
470                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
471                                         }
472                                         
473                                 }
474                                 //cout << UniqueBL << '\t' << totalBL << endl;          
475                                 UW = (UniqueBL / totalBL);  
476                                 
477                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
478                                 
479                                 results[count] = UW;
480                         }
481                         count++;
482                         
483                 }
484                 
485                 delete copyTree;
486                 
487                 return results; 
488         }
489         catch(exception& e) {
490                 m->errorOut(e, "Unweighted", "driver");
491                 exit(1);
492         }
493 }
494 /**************************************************************************************************/
495 int Unweighted::getRoot(Tree* t, int v, vector<string> grouping) { 
496         try {
497                 //you are a leaf so get your parent
498                 int index = t->tree[v].getParent();
499                 
500                 if (includeRoot) { 
501                         rootForGrouping[grouping].clear();
502                 }else {
503                         
504                         //my parent is a potential root
505                         rootForGrouping[grouping].insert(index);
506                         
507                         //while you aren't at root
508                         while(t->tree[index].getParent() != -1){
509                                 //cout << index << endl;        
510                                 if (m->control_pressed) {  return 0; }
511                                 
512                                 //am I the root for this grouping? if so I want to stop "early"
513                                 //does my sibling have descendants from the users groups? 
514                                 //if so I am not the root
515                                 int parent = t->tree[index].getParent();
516                                 int lc = t->tree[parent].getLChild();
517                                 int rc = t->tree[parent].getRChild();
518                                 
519                                 int sib = lc;
520                                 if (lc == index) { sib = rc; }
521                                 
522                                 map<string, int>::iterator itGroup;
523                                 int pcountSize = 0;
524                                 for (int j = 0; j < grouping.size(); j++) {
525                                         map<string, int>::iterator itGroup = t->tree[sib].pcount.find(grouping[j]);
526                                         if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
527                                 }
528                                 
529                                 //if yes, I am not the root
530                                 if (pcountSize != 0) {
531                                         rootForGrouping[grouping].clear();
532                                         rootForGrouping[grouping].insert(parent);
533                                 }
534                                 
535                                 index = parent; 
536                         }
537                         
538                         //get all nodes above the root to add so we don't add their u values above
539                         index = *(rootForGrouping[grouping].begin());
540                         while(t->tree[index].getParent() != -1){
541                                 int parent = t->tree[index].getParent();
542                                 rootForGrouping[grouping].insert(parent);
543                                 //cout << parent << " in root" << endl;
544                                 index = parent;
545                         }
546                 }
547                 
548                 return 0;
549         }
550         catch(exception& e) {
551                 m->errorOut(e, "Unweighted", "getRoot");
552                 exit(1);
553         }
554 }
555 /**************************************************************************************************/
556
557