]> git.donarmstrong.com Git - mothur.git/blob - unweighted.h
changed random forest output filename
[mothur.git] / unweighted.h
1 #ifndef UNWEIGHTED_H
2 #define UNWEIGHTED_H
3
4
5 /*
6  *  unweighted.h
7  *  Mothur
8  *
9  *  Created by Sarah Westcott on 2/9/09.
10  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
11  *
12  */
13
14 #include "treecalculator.h"
15 #include "counttable.h"
16
17 /***********************************************************************/
18
19 class Unweighted : public TreeCalculator  {
20         
21         public:
22         Unweighted(bool r) : includeRoot(r) {};
23                 ~Unweighted() {};
24                 EstOutput getValues(Tree*, int, string);
25                 EstOutput getValues(Tree*, string, string, int, string);
26                 
27         private:
28                 struct linePair {
29                         int start;
30                         int num;
31                         linePair(int i, int j) : start(i), num(j) {}
32                 };
33                 vector<linePair> lines;
34                 
35                 EstOutput data;
36                 int processors;
37                 string outputDir;
38                 map< vector<string>, set<int> > rootForGrouping;  //maps a grouping combo to the roots for that combo
39                 bool includeRoot;
40                 
41                 EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*); 
42                 EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
43                 EstOutput driver(Tree*, vector< vector<string> >, int, int, bool, CountTable*); 
44                 EstOutput createProcesses(Tree*, vector< vector<string> >, bool, CountTable*);
45                 int getRoot(Tree*, int, vector<string>);
46 };
47
48 /***********************************************************************/
49 struct unweightedData {
50     int start;
51         int num;
52         MothurOut* m;
53     EstOutput results;
54     vector< vector<string> > namesOfGroupCombos;
55     Tree* t;
56     CountTable* ct;
57     bool includeRoot;
58     
59         unweightedData(){}
60         unweightedData(MothurOut* mout, int st, int en, vector< vector<string> > ngc, Tree* tree, CountTable* count, bool ir) {
61         m = mout;
62                 start = st;
63                 num = en;
64         namesOfGroupCombos = ngc;
65         t = tree;
66         ct = count;
67         includeRoot = ir;
68         }
69 };
70
71 /**************************************************************************************************/
72 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
73 #else
74 static DWORD WINAPI MyUnWeightedThreadFunction(LPVOID lpParam){
75         unweightedData* pDataArray;
76         pDataArray = (unweightedData*)lpParam;
77         try {
78         pDataArray->results.resize(pDataArray->num);
79         map< vector<string>, set<int> > rootForGrouping;
80                 
81                 int count = 0;
82                         
83                 for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
84                         if (pDataArray->m->control_pressed) { return 0; }
85             
86                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
87                         double totalBL = 0.00;  //all branch lengths
88                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
89             
90                         //find a node that belongs to one of the groups in this combo
91                         int nodeBelonging = -1;
92                         for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size(); g++) {
93                                 if (pDataArray->t->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = pDataArray->t->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]][0]; break; }
94                         }
95                         
96                         //sanity check
97                         if (nodeBelonging == -1) {
98                                 pDataArray->m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping ");
99                                 for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size()-1; g++) { pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][g] + "-"); }
100                                 pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][pDataArray->namesOfGroupCombos[h].size()-1]);
101                                 pDataArray->m->mothurOut(", skipping."); pDataArray->m->mothurOutEndLine(); pDataArray->results[count] = UW;
102                         }else{
103                                 
104                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
105                                 //getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
106                                 /////////////////////////////////////////////////////////////////////////////
107                 //you are a leaf so get your parent
108                 vector<string> grouping = pDataArray->namesOfGroupCombos[h];
109                 int index = pDataArray->t->tree[nodeBelonging].getParent();
110                 
111                 if (pDataArray->includeRoot) {
112                     rootForGrouping[grouping].clear();
113                 }else {
114                     
115                     //my parent is a potential root
116                     rootForGrouping[grouping].insert(index);
117                     
118                     //while you aren't at root
119                     while(pDataArray->t->tree[index].getParent() != -1){
120                         //cout << index << endl;
121                         if (pDataArray->m->control_pressed) {  return 0; }
122                         
123                         //am I the root for this grouping? if so I want to stop "early"
124                         //does my sibling have descendants from the users groups?
125                         //if so I am not the root
126                         int parent = pDataArray->t->tree[index].getParent();
127                         int lc = pDataArray->t->tree[parent].getLChild();
128                         int rc = pDataArray->t->tree[parent].getRChild();
129                         
130                         int sib = lc;
131                         if (lc == index) { sib = rc; }
132                         
133                         map<string, int>::iterator itGroup;
134                         int pcountSize = 0;
135                         for (int j = 0; j < grouping.size(); j++) {
136                             map<string, int>::iterator itGroup = pDataArray->t->tree[sib].pcount.find(grouping[j]);
137                             if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
138                         }
139                         
140                         //if yes, I am not the root
141                         if (pcountSize != 0) {
142                             rootForGrouping[grouping].clear();
143                             rootForGrouping[grouping].insert(parent);
144                         }
145                         
146                         index = parent; 
147                     }
148                     
149                     //get all nodes above the root to add so we don't add their u values above
150                     index = *(rootForGrouping[grouping].begin());
151                     while(pDataArray->t->tree[index].getParent() != -1){
152                         int parent = pDataArray->t->tree[index].getParent();
153                         rootForGrouping[grouping].insert(parent);
154                         //cout << parent << " in root" << endl;
155                         index = parent;
156                     }
157                 }
158                 /////////////////////////////////////////////////////////////////////////////
159                 
160                                 for(int i=0;i<pDataArray->t->getNumNodes();i++){
161                                         
162                                         if (pDataArray->m->control_pressed) {  return 0; }
163                                         //cout << i << endl;
164                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
165                                         //pcountSize = 2, not unique to one group
166                                         //pcountSize = 1, unique to one group
167                                         
168                                         int pcountSize = 0;
169                                         for (int j = 0; j < pDataArray->namesOfGroupCombos[h].size(); j++) {
170                                                 map<string, int>::iterator itGroup = pDataArray->t->tree[i].pcount.find(pDataArray->namesOfGroupCombos[h][j]);
171                                                 if (itGroup != pDataArray->t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
172                                         }
173                                         
174                                         
175                                         //unique calc
176                                         if (pcountSize == 0) { }
177                                         else if ((pDataArray->t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root
178                                                 UniqueBL += abs(pDataArray->t->tree[i].getBranchLength());
179                                         }
180                     
181                                         //total calc
182                                         if (pcountSize == 0) { }
183                                         else if ((pDataArray->t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root
184                                                 totalBL += abs(pDataArray->t->tree[i].getBranchLength());
185                                         }
186                                 }
187                 //cout << UniqueBL << '\t' << totalBL << endl;
188                                 UW = (UniqueBL / totalBL);
189                 
190                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
191                 
192                                 pDataArray->results[count] = UW;
193                         }
194                         count++;
195                 }
196                 
197                 return 0;
198         
199     }
200         catch(exception& e) {
201                 pDataArray->m->errorOut(e, "UnWeighted", "MyUnWeightedThreadFunction");
202                 exit(1);
203         }
204 }
205 /**************************************************************************************************/
206
207 static DWORD WINAPI MyUnWeightedRandomThreadFunction(LPVOID lpParam){
208         unweightedData* pDataArray;
209         pDataArray = (unweightedData*)lpParam;
210         try {
211         pDataArray->results.resize(pDataArray->num);
212                 
213                 int count = 0;
214                 
215                 Tree* copyTree = new Tree(pDataArray->ct);
216                 
217                 for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
218             
219                         if (pDataArray->m->control_pressed) { return 0; }
220             
221             map< vector<string>, set<int> > rootForGrouping;
222             
223                         //copy random tree passed in
224                         copyTree->getCopy(pDataArray->t);
225             
226                         //swap labels in the groups you want to compare
227                         copyTree->assembleRandomUnifracTree(pDataArray->namesOfGroupCombos[h]);
228                         
229                         double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
230                         double totalBL = 0.00;  //all branch lengths
231                         double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
232                         //find a node that belongs to one of the groups in this combo
233                         int nodeBelonging = -1;
234                         for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size(); g++) {
235                                 if (copyTree->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]][0]; break; }
236                         }
237                         
238                         //sanity check
239                         if (nodeBelonging == -1) {
240                                 pDataArray->m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping ");
241                                 for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size()-1; g++) { pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][g] + "-"); }
242                                 pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][pDataArray->namesOfGroupCombos[h].size()-1]);
243                                 pDataArray->m->mothurOut(", skipping."); pDataArray->m->mothurOutEndLine(); pDataArray->results[count] = UW;
244                         }else{
245                                 
246                                 //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
247                                 //getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
248                 /////////////////////////////////////////////////////////////////////////////
249                 //you are a leaf so get your parent
250                 vector<string> grouping = pDataArray->namesOfGroupCombos[h];
251                 int index = copyTree->tree[nodeBelonging].getParent();
252                 
253                 if (pDataArray->includeRoot) {
254                     rootForGrouping[grouping].clear();
255                 }else {
256                     
257                     //my parent is a potential root
258                     rootForGrouping[grouping].insert(index);
259                     
260                     //while you aren't at root
261                     while(copyTree->tree[index].getParent() != -1){
262                         //cout << index << endl;
263                         if (pDataArray->m->control_pressed) {  return 0; }
264                         
265                         //am I the root for this grouping? if so I want to stop "early"
266                         //does my sibling have descendants from the users groups?
267                         //if so I am not the root
268                         int parent = copyTree->tree[index].getParent();
269                         int lc = copyTree->tree[parent].getLChild();
270                         int rc = copyTree->tree[parent].getRChild();
271                         
272                         int sib = lc;
273                         if (lc == index) { sib = rc; }
274                         
275                         map<string, int>::iterator itGroup;
276                         int pcountSize = 0;
277                         for (int j = 0; j < grouping.size(); j++) {
278                             map<string, int>::iterator itGroup = copyTree->tree[sib].pcount.find(grouping[j]);
279                             if (itGroup != copyTree->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
280                         }
281                         
282                         //if yes, I am not the root
283                         if (pcountSize != 0) {
284                             rootForGrouping[grouping].clear();
285                             rootForGrouping[grouping].insert(parent);
286                         }
287                         
288                         index = parent;
289                     }
290                     
291                     //get all nodes above the root to add so we don't add their u values above
292                     index = *(rootForGrouping[grouping].begin());
293                     while(copyTree->tree[index].getParent() != -1){
294                         int parent = copyTree->tree[index].getParent();
295                         rootForGrouping[grouping].insert(parent);
296                         //cout << parent << " in root" << endl;
297                         index = parent;
298                     }
299                 }
300                 /////////////////////////////////////////////////////////////////////////////
301                                 for(int i=0;i<copyTree->getNumNodes();i++){
302                                         
303                                         if (pDataArray->m->control_pressed) {  return 0; }
304                                         
305                                         //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
306                                         //pcountSize = 2, not unique to one group
307                                         //pcountSize = 1, unique to one group
308                     int pcountSize = 0;
309                                         for (int j = 0; j < pDataArray->namesOfGroupCombos[h].size(); j++) {
310                                                 map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(pDataArray->namesOfGroupCombos[h][j]);
311                                                 if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
312                                         }
313                                         
314                                         //unique calc
315                                         if (pcountSize == 0) { }
316                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root
317                                                 UniqueBL += abs(copyTree->tree[i].getBranchLength());
318                                         }
319                                         
320                                         //total calc
321                                         if (pcountSize == 0) { }
322                                         else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root
323                                                 totalBL += abs(copyTree->tree[i].getBranchLength());
324                                         }
325                                         
326                                 }
327                                 cout << h << '\t' << UniqueBL << '\t' << totalBL << endl;
328                                 UW = (UniqueBL / totalBL);
329                                 
330                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
331                                 
332                                 pDataArray->results[count] = UW;
333                 cout << h << '\t' << UW << endl;
334                         }
335                         count++;
336                         
337                 }
338                 
339                 delete copyTree;
340                 
341                 return 0;
342     }
343         catch(exception& e) {
344                 pDataArray->m->errorOut(e, "UnWeighted", "MyUnWeightedRandomThreadFunction");
345                 exit(1);
346         }
347 }
348
349 #endif
350
351
352 #endif