]> git.donarmstrong.com Git - mothur.git/blob - unweighted.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / unweighted.cpp
1 /*
2  *  unweighted.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unweighted.h"
11
12 /**************************************************************************************************/
13
14 EstOutput Unweighted::getValues(Tree* t, int p, string o) {
15         try {
16                 processors = p;
17                 outputDir = o;
18         
19         CountTable* ct = t->getCountTable();
20         
21                 //if the users enters no groups then give them the score of all groups
22                 int numGroups = m->getNumGroups();
23                 
24                 //calculate number of comparsions
25                 int numComp = 0;
26                 vector< vector<string> > namesOfGroupCombos;
27                 for (int r=0; r<numGroups; r++) { 
28                         for (int l = 0; l < r; l++) {
29                                 numComp++;
30                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
31                                 namesOfGroupCombos.push_back(groups);
32                         }
33                 }
34                 
35                 if (numComp != 1) {
36                         vector<string> groups;
37                         if (numGroups == 0) {
38                                 //get score for all users groups
39                                 for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
40                                         if ((ct->getNamesOfGroups())[i] != "xxx") {
41                                                 groups.push_back((ct->getNamesOfGroups())[i]);
42                                         }
43                                 }
44                                 namesOfGroupCombos.push_back(groups);
45                         }else {
46                                 for (int i = 0; i < m->getNumGroups(); i++) {
47                                         groups.push_back((m->getGroups())[i]);
48                                 }
49                                 namesOfGroupCombos.push_back(groups);
50                         }
51                 }
52
53                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
54                         if(processors == 1){
55                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
56                         }else{
57                                 int numPairs = namesOfGroupCombos.size();
58                                 
59                                 int numPairsPerProcessor = numPairs / processors;
60                                 
61                                 for (int i = 0; i < processors; i++) {
62                                         int startPos = i * numPairsPerProcessor;
63                         
64                                         if(i == processors - 1){
65                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
66                                         }
67                 
68                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
69                                 }
70                                 data = createProcesses(t, namesOfGroupCombos, ct);
71                                 lines.clear();
72                         }
73                 #else
74                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
75                 #endif
76                 
77                 return data;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "Unweighted", "getValues");
81                 exit(1);
82         }
83 }
84 /**************************************************************************************************/
85
86 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
87         try {
88 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
89                 int process = 1;
90                 vector<int> processIDS;
91                 
92                 EstOutput results;
93                 
94                 //loop through and create all the processes you want
95                 while (process != processors) {
96                         int pid = fork();
97                         
98                         if (pid > 0) {
99                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
100                                 process++;
101                         }else if (pid == 0){
102                                 EstOutput myresults;
103                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
104                                 
105                                 if (m->control_pressed) { exit(0); }
106                                 
107                                 //m->mothurOut("Merging results."); m->mothurOutEndLine();
108                                 
109                                 //pass numSeqs to parent
110                                 ofstream out;
111                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
112                                 m->openOutputFile(tempFile, out);
113                                 out << myresults.size() << endl;
114                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
115                                 out.close();
116                                 
117                                 exit(0);
118                         }else { 
119                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
120                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
121                                 exit(0); 
122                         }
123                 }
124                 
125                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
126                 
127                 //force parent to wait until all the processes are done
128                 for (int i=0;i<(processors-1);i++) { 
129                         int temp = processIDS[i];
130                         wait(&temp);
131                 }
132                 
133                 if (m->control_pressed) { return results; }
134                 
135                 //get data created by processes
136                 for (int i=0;i<(processors-1);i++) { 
137                         ifstream in;
138                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
139                         m->openInputFile(s, in);
140                         
141                         //get quantiles
142                         if (!in.eof()) {
143                                 int num;
144                                 in >> num; m->gobble(in);
145                                 
146                                 if (m->control_pressed) { break; }
147                                 
148                                 double w; 
149                                 for (int j = 0; j < num; j++) {
150                                         in >> w;
151                                         results.push_back(w);
152                                 }
153                                 m->gobble(in);
154                         }
155                         in.close();
156                         m->mothurRemove(s);
157                 }
158                 
159                 //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
160                 
161                 return results;
162 #endif          
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "Unweighted", "createProcesses");
166                 exit(1);
167         }
168 }
169 /**************************************************************************************************/
170 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) { 
171  try {
172         
173          
174                 EstOutput results; results.resize(num);
175                 
176                 int count = 0;
177                 int total = num;
178                 int twentyPercent = (total * 0.20);
179                 if (twentyPercent == 0) { twentyPercent = 1; }
180          
181                         
182                 for (int h = start; h < (start+num); h++) {
183                                 
184                         if (m->control_pressed) { return results; }
185                 
186                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
187                         double totalBL = 0.00;  //all branch lengths
188                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
189                                                 
190                         //find a node that belongs to one of the groups in this combo
191                         int nodeBelonging = -1;
192                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
193                                 if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
194                         }
195                         
196                         //sanity check
197                         if (nodeBelonging == -1) {
198                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
199                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
200                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
201                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
202                         }else{
203                                 //cout << "trying to get root" << endl; 
204                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
205                                 getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
206                                 //cout << "here" << endl;       
207                                 for(int i=0;i<t->getNumNodes();i++){
208                                         
209                                         if (m->control_pressed) {  return data; }
210                                         //cout << i << endl;    
211                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
212                                         //pcountSize = 2, not unique to one group
213                                         //pcountSize = 1, unique to one group
214                                         
215                                         int pcountSize = 0;
216                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
217                                                 map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
218                                                 if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
219                                         }
220                                         
221                                         
222                                         //unique calc
223                                         if (pcountSize == 0) { }
224                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
225                                                 UniqueBL += abs(t->tree[i].getBranchLength()); 
226                                         }
227                                                 
228                                         //total calc
229                                         if (pcountSize == 0) { }
230                                         else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
231                                                 totalBL += abs(t->tree[i].getBranchLength()); 
232                                         }
233                                 }
234         //cout << UniqueBL << '\t' << totalBL << endl;          
235                                 UW = (UniqueBL / totalBL);  
236         
237                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
238         
239                                 results[count] = UW;
240                         }
241                         count++;
242
243                         //report progress
244                         //if((count % twentyPercent) == 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
245                 }
246                 
247                 //report progress
248                 //if((count % twentyPercent) != 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
249                 
250                 return results; 
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "Unweighted", "driver");
254                 exit(1);
255         }
256 }
257 /**************************************************************************************************/
258
259 EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { 
260  try {
261                 processors = p;
262                 outputDir = o;
263                 
264         CountTable* ct = t->getCountTable();
265      
266                 //if the users enters no groups then give them the score of all groups
267                 int numGroups = m->getNumGroups();
268                 
269                 //calculate number of comparsions
270                 int numComp = 0;
271                 vector< vector<string> > namesOfGroupCombos;
272                 for (int r=0; r<numGroups; r++) { 
273                         for (int l = 0; l < r; l++) {
274                                 numComp++;
275                                 vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
276                                 namesOfGroupCombos.push_back(groups);
277                         }
278                 }
279                 
280                 if (numComp != 1) {
281                         vector<string> groups;
282                         if (numGroups == 0) {
283                                 //get score for all users groups
284                                 for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
285                                         if ((ct->getNamesOfGroups())[i] != "xxx") {
286                                                 groups.push_back((ct->getNamesOfGroups())[i]);
287                                         }
288                                 }
289                                 namesOfGroupCombos.push_back(groups);
290                         }else {
291                                 for (int i = 0; i < m->getNumGroups(); i++) {
292                                         groups.push_back((m->getGroups())[i]);
293                                 }
294                                 namesOfGroupCombos.push_back(groups);
295                         }
296                 }
297
298                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
299                         if(processors == 1){
300                                 data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
301                         }else{
302                                 int numPairs = namesOfGroupCombos.size();
303                                 
304                                 int numPairsPerProcessor = numPairs / processors;
305                                 
306                                 for (int i = 0; i < processors; i++) {
307                                         int startPos = i * numPairsPerProcessor;
308                                         if(i == processors - 1){
309                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
310                                         }
311                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
312                                 }
313                                         
314                                 data = createProcesses(t, namesOfGroupCombos, true, ct);
315                                 
316                                 lines.clear();
317                         }
318                 #else
319                         data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
320                 #endif
321         
322                 return data;
323         }
324         catch(exception& e) {
325                 m->errorOut(e, "Unweighted", "getValues");
326                 exit(1);
327         }
328 }
329 /**************************************************************************************************/
330
331 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
332         try {
333 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
334                 int process = 1;
335                 vector<int> processIDS;
336                 
337                 EstOutput results;
338                 
339                 //loop through and create all the processes you want
340                 while (process != processors) {
341                         int pid = fork();
342                         
343                         if (pid > 0) {
344                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
345                                 process++;
346                         }else if (pid == 0){
347                                 EstOutput myresults;
348                                 myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, ct);
349                                 
350                                 if (m->control_pressed) { exit(0); }
351                                 
352                                 //pass numSeqs to parent
353                                 ofstream out;
354                                 string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
355                                 m->openOutputFile(tempFile, out);
356                                 out << myresults.size() << endl;
357                                 for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
358                                 out.close();
359                                 
360                                 exit(0);
361                         }else { 
362                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
363                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
364                                 exit(0); 
365                         }
366                 }
367                 
368                 results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
369                 
370                 //force parent to wait until all the processes are done
371                 for (int i=0;i<(processors-1);i++) { 
372                         int temp = processIDS[i];
373                         wait(&temp);
374                 }
375                 
376                 if (m->control_pressed) { return results; }
377                 
378                 //get data created by processes
379                 for (int i=0;i<(processors-1);i++) { 
380                         ifstream in;
381                         string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
382                         m->openInputFile(s, in);
383                         
384                         //get quantiles
385                         if (!in.eof()) {
386                                 int num;
387                                 in >> num; m->gobble(in);
388                                         
389                                 if (m->control_pressed) { break; }
390                                 
391                                 double w; 
392                                 for (int j = 0; j < num; j++) {
393                                         in >> w;
394                                         
395                                         results.push_back(w);
396                                 }
397                                 m->gobble(in);
398                         }
399                         in.close();
400                         m->mothurRemove(s);
401                 }
402                 
403                 return results;
404 #endif          
405         }
406         catch(exception& e) {
407                 m->errorOut(e, "Unweighted", "createProcesses");
408                 exit(1);
409         }
410 }
411 /**************************************************************************************************/
412 EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, CountTable* ct) { 
413  try {
414                 
415                 EstOutput results; results.resize(num);
416                 
417                 int count = 0;
418                 
419                 Tree* copyTree = new Tree(ct);
420                 
421                 for (int h = start; h < (start+num); h++) {
422                 
423                         if (m->control_pressed) { return results; }
424                 
425                         //copy random tree passed in
426                         copyTree->getCopy(t);
427                                 
428                         //swap labels in the groups you want to compare
429                         copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]);
430                         
431                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
432                         double totalBL = 0.00;  //all branch lengths
433                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
434                         //find a node that belongs to one of the groups in this combo
435                         int nodeBelonging = -1;
436                         for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
437                                 if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
438                         }
439                         
440                         //sanity check
441                         if (nodeBelonging == -1) {
442                                 m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
443                                 for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
444                                 m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
445                                 m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
446                         }else{
447                                 
448                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
449                                 getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
450                                 
451                                 for(int i=0;i<copyTree->getNumNodes();i++){
452                                         
453                                         if (m->control_pressed) {  return data; }
454                                         
455                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
456                                         //pcountSize = 2, not unique to one group
457                                         //pcountSize = 1, unique to one group
458                                         
459                                         int pcountSize = 0;
460                                         for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
461                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
462                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
463                                         }
464                                         
465                                         //unique calc
466                                         if (pcountSize == 0) { }
467                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
468                                                 UniqueBL += abs(copyTree->tree[i].getBranchLength()); 
469                                         }
470                                         
471                                         //total calc
472                                         if (pcountSize == 0) { }
473                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
474                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
475                                         }
476                                         
477                                 }
478                                 //cout << UniqueBL << '\t' << totalBL << endl;          
479                                 UW = (UniqueBL / totalBL);  
480                                 
481                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
482                                 
483                                 results[count] = UW;
484                         }
485                         count++;
486                         
487                 }
488                 
489                 delete copyTree;
490                 
491                 return results; 
492         }
493         catch(exception& e) {
494                 m->errorOut(e, "Unweighted", "driver");
495                 exit(1);
496         }
497 }
498 /**************************************************************************************************/
499 int Unweighted::getRoot(Tree* t, int v, vector<string> grouping) { 
500         try {
501                 //you are a leaf so get your parent
502                 int index = t->tree[v].getParent();
503                 
504                 if (includeRoot) { 
505                         rootForGrouping[grouping].clear();
506                 }else {
507                         
508                         //my parent is a potential root
509                         rootForGrouping[grouping].insert(index);
510                         
511                         //while you aren't at root
512                         while(t->tree[index].getParent() != -1){
513                                 //cout << index << endl;        
514                                 if (m->control_pressed) {  return 0; }
515                                 
516                                 //am I the root for this grouping? if so I want to stop "early"
517                                 //does my sibling have descendants from the users groups? 
518                                 //if so I am not the root
519                                 int parent = t->tree[index].getParent();
520                                 int lc = t->tree[parent].getLChild();
521                                 int rc = t->tree[parent].getRChild();
522                                 
523                                 int sib = lc;
524                                 if (lc == index) { sib = rc; }
525                                 
526                                 map<string, int>::iterator itGroup;
527                                 int pcountSize = 0;
528                                 for (int j = 0; j < grouping.size(); j++) {
529                                         map<string, int>::iterator itGroup = t->tree[sib].pcount.find(grouping[j]);
530                                         if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
531                                 }
532                                 
533                                 //if yes, I am not the root
534                                 if (pcountSize != 0) {
535                                         rootForGrouping[grouping].clear();
536                                         rootForGrouping[grouping].insert(parent);
537                                 }
538                                 
539                                 index = parent; 
540                         }
541                         
542                         //get all nodes above the root to add so we don't add their u values above
543                         index = *(rootForGrouping[grouping].begin());
544                         while(t->tree[index].getParent() != -1){
545                                 int parent = t->tree[index].getParent();
546                                 rootForGrouping[grouping].insert(parent);
547                                 //cout << parent << " in root" << endl;
548                                 index = parent;
549                         }
550                 }
551                 
552                 return 0;
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "Unweighted", "getRoot");
556                 exit(1);
557         }
558 }
559 /**************************************************************************************************/
560
561