]> git.donarmstrong.com Git - mothur.git/blob - hcluster.cpp
added groups option to read.otu, added qtrim option to trim.seqs, fixed bug in get...
[mothur.git] / hcluster.cpp
1 /*
2  *  hcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/13/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "hcluster.h"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 #include "sparsematrix.hpp"
14
15 /***********************************************************************/
16
17 HCluster::HCluster(RAbundVector* rav, ListVector* lv) :  rabund(rav), list(lv){
18         try {
19         
20                 numSeqs = list->getNumSeqs();
21                 
22                 //initialize cluster array
23                 for (int i = 0; i < numSeqs; i++) {
24                         clusterNode temp(1, -1, i);
25                         clusterArray.push_back(temp);
26                 }
27                 
28         }
29         catch(exception& e) {
30                 errorOut(e, "HCluster", "HCluster");
31                 exit(1);
32         }
33 }
34 /***********************************************************************/
35
36 void HCluster::clusterBins(){
37         try {
38                 //cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild);
39
40                 rabund->set(clusterArray[smallCol].smallChild, rabund->get(clusterArray[smallRow].smallChild)+rabund->get(clusterArray[smallCol].smallChild));  
41                 rabund->set(clusterArray[smallRow].smallChild, 0);      
42                 rabund->setLabel(toString(smallDist));
43
44                 //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl;
45         }
46         catch(exception& e) {
47                 errorOut(e, "HCluster", "clusterBins");
48                 exit(1);
49         }
50
51
52 }
53
54 /***********************************************************************/
55
56 void HCluster::clusterNames(){
57         try {
58                 ///cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild);
59
60                 list->set(clusterArray[smallCol].smallChild, list->get(clusterArray[smallRow].smallChild)+','+list->get(clusterArray[smallCol].smallChild));
61                 list->set(clusterArray[smallRow].smallChild, "");       
62                 list->setLabel(toString(smallDist));
63         
64                 //cout << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild) << endl;
65
66     }
67         catch(exception& e) {
68                 errorOut(e, "HCluster", "clusterNames");
69                 exit(1);
70         }
71
72 }
73 /***********************************************************************/
74 int HCluster::getUpmostParent(int node){
75         try {
76                 
77                 while (clusterArray[node].parent != -1) {
78                         node = clusterArray[node].parent;
79                 }
80                 
81                 return node;
82         }
83         catch(exception& e) {
84                 errorOut(e, "HCluster", "getUpmostParent");
85                 exit(1);
86         }
87 }
88 /***********************************************************************/
89 void HCluster::printInfo(){
90         try {
91                 
92                 cout << "link table" << endl;
93                 for (it = activeLinks.begin(); it!= activeLinks.end(); it++) {
94                         cout << it->first << " = " << it->second << endl;
95                 }
96                 cout << endl;
97                 for (int i = 0; i < linkTable.size(); i++) {
98                         cout << i << '\t';
99                         for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) {
100                                 cout << it->first << '-' << it->second << '\t';
101                         }
102                         cout << endl;
103                 }
104                 cout << endl << "clusterArray" << endl;
105                 
106                 for (int i = 0; i < clusterArray.size(); i++) {
107                         cout << i << '\t' << clusterArray[i].numSeq << '\t' << clusterArray[i].parent << '\t' << clusterArray[i].smallChild << endl;
108                 }
109                 cout << endl;
110                 
111                 
112         }
113         catch(exception& e) {
114                 errorOut(e, "HCluster", "getUpmostParent");
115                 exit(1);
116         }
117 }
118 /***********************************************************************/
119 int HCluster::makeActive() {
120         try {
121         
122                 int linkValue = 1; 
123 //cout << "active - here" << endl;              
124                 it = activeLinks.find(smallRow);
125                 it2 = activeLinks.find(smallCol);
126                 
127                 if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them
128                         int size = linkTable.size();
129                         map<int, int> temp; map<int, int> temp2;
130                         
131                         //add link to eachother
132                         temp[smallRow] = 1;                                                     //         1    2
133                         temp2[smallCol] = 1;                                            // 1   0        1
134                                                                                                                 // 2   1        0
135                         linkTable.push_back(temp);
136                         linkTable.push_back(temp2);
137                         
138                         //add to activeLinks
139                         activeLinks[smallRow] = size;
140                         activeLinks[smallCol] = size+1;
141 //cout << "active - here1" << endl;
142                 }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) {  //smallRow is active, smallCol is not
143                          int size = linkTable.size();
144                          int alreadyActiveRow = it->second;
145                          map<int, int> temp; 
146                         
147                         //add link to eachother
148                         temp[smallRow] = 1;                                                     //         6    2       3       5
149                         linkTable.push_back(temp);                                      // 6   0        1       2       0
150                         linkTable[alreadyActiveRow][smallCol] = 1;      // 2   1        0       1       1
151                                                                                                                 // 3   2        1       0       0
152                                                                                                                 // 5   0    1   0   0   
153                         //add to activeLinks
154                         activeLinks[smallCol] = size;
155 //cout << "active - here2" << endl;                     
156                 }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) {  //smallCol is active, smallRow is not
157                          int size = linkTable.size();
158                          int alreadyActiveCol = it2->second;
159                          map<int, int> temp; 
160                         
161                         //add link to eachother
162                         temp[smallCol] = 1;                                                     //         6    2       3       5
163                         linkTable.push_back(temp);                                      // 6   0        1       2       0
164                         linkTable[alreadyActiveCol][smallRow] = 1;      // 2   1        0       1       1
165                                                                                                                 // 3   2        1       0       0
166                                                                                                                 // 5   0    1   0   0   
167                         //add to activeLinks
168                         activeLinks[smallRow] = size;
169 //cout << "active - here3" << endl;
170                 }else { //both are active so add one
171                         int row = it->second;
172                         int col = it2->second;
173 //cout << row << '\t' << col << endl;                   
174                         
175                         linkTable[row][smallCol]++;
176                         linkTable[col][smallRow]++;
177                         linkValue = linkTable[row][smallCol];
178 //cout << "active - here4" << endl;
179                 }
180                 
181                 return linkValue;
182         }
183         catch(exception& e) {
184                 errorOut(e, "HCluster", "makeActive");
185                 exit(1);
186         }
187 }
188 /***********************************************************************/
189 void HCluster::updateArrayandLinkTable() {
190         try {
191                 //if cluster was made update clusterArray and linkTable
192                         int size = clusterArray.size();
193                         
194                         //add new node
195                         clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
196                         clusterArray.push_back(temp);
197                         
198                         //update child nodes
199                         clusterArray[smallRow].parent = size;
200                         clusterArray[smallCol].parent = size;
201                         
202                         //update linkTable by merging clustered rows and columns
203                         int rowSpot = activeLinks[smallRow];
204                         int colSpot = activeLinks[smallCol];
205         //cout << "here" << endl;               
206                         //fix old rows
207                         for (int i = 0; i < linkTable.size(); i++) {
208                                 //check if they are in map
209                                 it = linkTable[i].find(smallRow);
210                                 it2 = linkTable[i].find(smallCol);
211                                 
212                                 if ((it!=linkTable[i].end()) && (it2!=linkTable[i].end())) { //they are both there
213                                         linkTable[i][size] = linkTable[i][smallRow]+linkTable[i][smallCol];
214                                         linkTable[i].erase(smallCol); //delete col row
215                                         linkTable[i].erase(smallRow); //delete col row
216                                 }else if ((it==linkTable[i].end()) && (it2!=linkTable[i].end())) { //only col
217                                         linkTable[i][size] = linkTable[i][smallCol];
218                                         linkTable[i].erase(smallCol); //delete col 
219                                 }else if ((it!=linkTable[i].end()) && (it2==linkTable[i].end())) { //only row
220                                         linkTable[i][size] = linkTable[i][smallRow];
221                                         linkTable[i].erase(smallRow); //delete col 
222                                 }
223                         }
224         //printInfo();
225 //cout << "here2" << endl;
226                         //merge their values
227                         for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) {
228                                 it2 = linkTable[colSpot].find(it->first);  //does the col also have this
229                                 
230                                 if (it2 == linkTable[colSpot].end()) { //not there so add it
231                                         linkTable[colSpot][it->first] = it->second;
232                                 }else { //merge them
233                                         linkTable[colSpot][it->first] = it->second+it2->second;
234                                 }
235                         }
236 //cout << "here3" << endl;                      
237                         linkTable[colSpot].erase(size);
238                         //linkTable.erase(linkTable.begin()+rowSpot);  //delete row
239         //printInfo();          
240                         //update activerows
241                         activeLinks.erase(smallRow);
242                         activeLinks.erase(smallCol);
243                         activeLinks[size] = colSpot;
244                         
245                         //adjust everybody elses spot since you deleted - time vs. space
246                         //for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
247                                 //if (it->second > rowSpot) {  activeLinks[it->first]--;        }
248                         //}
249                         
250 //cout << "here4" << endl;
251         
252         }
253         catch(exception& e) {
254                 errorOut(e, "HCluster", "updateArrayandLinkTable");
255                 exit(1);
256         }
257 }
258 /***********************************************************************/
259 bool HCluster::update(int row, int col, float distance){
260         try {
261                 
262                 smallRow = row;
263                 smallCol = col;
264                 smallDist = distance;
265                 bool clustered = false;
266                 
267                 //find upmost parent of row and col
268                 smallRow = getUpmostParent(smallRow);
269                 smallCol = getUpmostParent(smallCol);
270         //cout << "row = " << row << " smallRow = " << smallRow <<  " col = " << col << " smallCol = " << smallCol << " dist = " << distance << endl;
271                 
272                 //are they active in the link table
273                 int linkValue = makeActive(); //after this point this nodes info is active in linkTable
274         //printInfo();                  
275 //cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl;
276                 //can we cluster???
277                 if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) {
278                         //printInfo();
279                         updateArrayandLinkTable();
280                         clusterBins();
281                         clusterNames();
282                         clustered = true;
283                         //printInfo();
284                 }
285                 
286                 //printInfo();
287                 return clustered;
288         }
289         catch(exception& e) {
290                 errorOut(e, "HCluster", "update");
291                 exit(1);
292         }
293 }
294
295
296 /***********************************************************************/
297
298