]> git.donarmstrong.com Git - mothur.git/blob - hcluster.cpp
fixed some bugs and added mgcluster command
[mothur.git] / hcluster.cpp
1 /*
2  *  hcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/13/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "hcluster.h"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 #include "sparsematrix.hpp"
14
15 /***********************************************************************/
16
17 HCluster::HCluster(RAbundVector* rav, ListVector* lv) :  rabund(rav), list(lv){
18         try {
19                 mapWanted = false;
20                 numSeqs = list->getNumSeqs();
21                 
22                 //initialize cluster array
23                 for (int i = 0; i < numSeqs; i++) {
24                         clusterNode temp(1, -1, i);
25                         clusterArray.push_back(temp);
26                 }
27                 
28         }
29         catch(exception& e) {
30                 errorOut(e, "HCluster", "HCluster");
31                 exit(1);
32         }
33 }
34 /***********************************************************************/
35
36 void HCluster::clusterBins(){
37         try {
38                 //cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild);
39
40                 rabund->set(clusterArray[smallCol].smallChild, rabund->get(clusterArray[smallRow].smallChild)+rabund->get(clusterArray[smallCol].smallChild));  
41                 rabund->set(clusterArray[smallRow].smallChild, 0);      
42                 rabund->setLabel(toString(smallDist));
43
44                 //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl;
45         }
46         catch(exception& e) {
47                 errorOut(e, "HCluster", "clusterBins");
48                 exit(1);
49         }
50
51
52 }
53
54 /***********************************************************************/
55
56 void HCluster::clusterNames(){
57         try {
58                 ///cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild);
59                 if (mapWanted) {  updateMap();  }
60                 
61                 list->set(clusterArray[smallCol].smallChild, list->get(clusterArray[smallRow].smallChild)+','+list->get(clusterArray[smallCol].smallChild));
62                 list->set(clusterArray[smallRow].smallChild, "");       
63                 list->setLabel(toString(smallDist));
64         
65                 //cout << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild) << endl;
66
67     }
68         catch(exception& e) {
69                 errorOut(e, "HCluster", "clusterNames");
70                 exit(1);
71         }
72
73 }
74 /***********************************************************************/
75 int HCluster::getUpmostParent(int node){
76         try {
77                 
78                 while (clusterArray[node].parent != -1) {
79                         node = clusterArray[node].parent;
80                 }
81                 
82                 return node;
83         }
84         catch(exception& e) {
85                 errorOut(e, "HCluster", "getUpmostParent");
86                 exit(1);
87         }
88 }
89 /***********************************************************************/
90 void HCluster::printInfo(){
91         try {
92                 
93                 cout << "link table" << endl;
94                 for (it = activeLinks.begin(); it!= activeLinks.end(); it++) {
95                         cout << it->first << " = " << it->second << endl;
96                 }
97                 cout << endl;
98                 for (int i = 0; i < linkTable.size(); i++) {
99                         cout << i << '\t';
100                         for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) {
101                                 cout << it->first << '-' << it->second << '\t';
102                         }
103                         cout << endl;
104                 }
105                 cout << endl << "clusterArray" << endl;
106                 
107                 for (int i = 0; i < clusterArray.size(); i++) {
108                         cout << i << '\t' << clusterArray[i].numSeq << '\t' << clusterArray[i].parent << '\t' << clusterArray[i].smallChild << endl;
109                 }
110                 cout << endl;
111                 
112                 
113         }
114         catch(exception& e) {
115                 errorOut(e, "HCluster", "getUpmostParent");
116                 exit(1);
117         }
118 }
119 /***********************************************************************/
120 int HCluster::makeActive() {
121         try {
122         
123                 int linkValue = 1; 
124 //cout << "active - here" << endl;              
125                 it = activeLinks.find(smallRow);
126                 it2 = activeLinks.find(smallCol);
127                 
128                 if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them
129                         int size = linkTable.size();
130                         map<int, int> temp; map<int, int> temp2;
131                         
132                         //add link to eachother
133                         temp[smallRow] = 1;                                                     //         1    2
134                         temp2[smallCol] = 1;                                            // 1   0        1
135                                                                                                                 // 2   1        0
136                         linkTable.push_back(temp);
137                         linkTable.push_back(temp2);
138                         
139                         //add to activeLinks
140                         activeLinks[smallRow] = size;
141                         activeLinks[smallCol] = size+1;
142 //cout << "active - here1" << endl;
143                 }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) {  //smallRow is active, smallCol is not
144                          int size = linkTable.size();
145                          int alreadyActiveRow = it->second;
146                          map<int, int> temp; 
147                         
148                         //add link to eachother
149                         temp[smallRow] = 1;                                                     //         6    2       3       5
150                         linkTable.push_back(temp);                                      // 6   0        1       2       0
151                         linkTable[alreadyActiveRow][smallCol] = 1;      // 2   1        0       1       1
152                                                                                                                 // 3   2        1       0       0
153                                                                                                                 // 5   0    1   0   0   
154                         //add to activeLinks
155                         activeLinks[smallCol] = size;
156 //cout << "active - here2" << endl;                     
157                 }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) {  //smallCol is active, smallRow is not
158                          int size = linkTable.size();
159                          int alreadyActiveCol = it2->second;
160                          map<int, int> temp; 
161                         
162                         //add link to eachother
163                         temp[smallCol] = 1;                                                     //         6    2       3       5
164                         linkTable.push_back(temp);                                      // 6   0        1       2       0
165                         linkTable[alreadyActiveCol][smallRow] = 1;      // 2   1        0       1       1
166                                                                                                                 // 3   2        1       0       0
167                                                                                                                 // 5   0    1   0   0   
168                         //add to activeLinks
169                         activeLinks[smallRow] = size;
170 //cout << "active - here3" << endl;
171                 }else { //both are active so add one
172                         int row = it->second;
173                         int col = it2->second;
174 //cout << row << '\t' << col << endl;                   
175                         
176                         linkTable[row][smallCol]++;
177                         linkTable[col][smallRow]++;
178                         linkValue = linkTable[row][smallCol];
179 //cout << "active - here4" << endl;
180                 }
181                 
182                 return linkValue;
183         }
184         catch(exception& e) {
185                 errorOut(e, "HCluster", "makeActive");
186                 exit(1);
187         }
188 }
189 /***********************************************************************/
190 void HCluster::updateArrayandLinkTable() {
191         try {
192                 //if cluster was made update clusterArray and linkTable
193                         int size = clusterArray.size();
194                         
195                         //add new node
196                         clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
197                         clusterArray.push_back(temp);
198                         
199                         //update child nodes
200                         clusterArray[smallRow].parent = size;
201                         clusterArray[smallCol].parent = size;
202                         
203                         //update linkTable by merging clustered rows and columns
204                         int rowSpot = activeLinks[smallRow];
205                         int colSpot = activeLinks[smallCol];
206         //cout << "here" << endl;               
207                         //fix old rows
208                         for (int i = 0; i < linkTable.size(); i++) {
209                                 //check if they are in map
210                                 it = linkTable[i].find(smallRow);
211                                 it2 = linkTable[i].find(smallCol);
212                                 
213                                 if ((it!=linkTable[i].end()) && (it2!=linkTable[i].end())) { //they are both there
214                                         linkTable[i][size] = linkTable[i][smallRow]+linkTable[i][smallCol];
215                                         linkTable[i].erase(smallCol); //delete col row
216                                         linkTable[i].erase(smallRow); //delete col row
217                                 }else if ((it==linkTable[i].end()) && (it2!=linkTable[i].end())) { //only col
218                                         linkTable[i][size] = linkTable[i][smallCol];
219                                         linkTable[i].erase(smallCol); //delete col 
220                                 }else if ((it!=linkTable[i].end()) && (it2==linkTable[i].end())) { //only row
221                                         linkTable[i][size] = linkTable[i][smallRow];
222                                         linkTable[i].erase(smallRow); //delete col 
223                                 }
224                         }
225         //printInfo();
226 //cout << "here2" << endl;
227                         //merge their values
228                         for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) {
229                                 it2 = linkTable[colSpot].find(it->first);  //does the col also have this
230                                 
231                                 if (it2 == linkTable[colSpot].end()) { //not there so add it
232                                         linkTable[colSpot][it->first] = it->second;
233                                 }else { //merge them
234                                         linkTable[colSpot][it->first] = it->second+it2->second;
235                                 }
236                         }
237 //cout << "here3" << endl;                      
238                         linkTable[colSpot].erase(size);
239                         linkTable.erase(linkTable.begin()+rowSpot);  //delete row
240         //printInfo();          
241                         //update activerows
242                         activeLinks.erase(smallRow);
243                         activeLinks.erase(smallCol);
244                         activeLinks[size] = colSpot;
245                         
246                         //adjust everybody elses spot since you deleted - time vs. space
247                         for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
248                                 if (it->second > rowSpot) {  activeLinks[it->first]--;  }
249                         }
250                         
251 //cout << "here4" << endl;
252         
253         }
254         catch(exception& e) {
255                 errorOut(e, "HCluster", "updateArrayandLinkTable");
256                 exit(1);
257         }
258 }
259 /***********************************************************************/
260 void HCluster::update(int row, int col, float distance){
261         try {
262                 
263                 smallRow = row;
264                 smallCol = col;
265                 smallDist = distance;
266                 bool clustered = false;
267                 
268                 //find upmost parent of row and col
269                 smallRow = getUpmostParent(smallRow);
270                 smallCol = getUpmostParent(smallCol);
271         //cout << "row = " << row << " smallRow = " << smallRow <<  " col = " << col << " smallCol = " << smallCol << " dist = " << distance << endl;
272                 
273                 //are they active in the link table
274                 int linkValue = makeActive(); //after this point this nodes info is active in linkTable
275         //printInfo();                  
276 //cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl;
277                 //can we cluster???
278                 if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) {
279                         //printInfo();
280                         updateArrayandLinkTable();
281                         clusterBins();
282                         clusterNames();
283                         clustered = true;
284                         //printInfo();
285                 }
286                 
287                 //printInfo();
288         }
289         catch(exception& e) {
290                 errorOut(e, "HCluster", "update");
291                 exit(1);
292         }
293 }
294 /***********************************************************************/
295 void HCluster::setMapWanted(bool m)  {  
296         try {
297                 mapWanted = m;
298                 
299                 //initialize map
300                 for (int i = 0; i < list->getNumBins(); i++) {
301                         
302                         //parse bin 
303                         string names = list->get(i);
304                         while (names.find_first_of(',') != -1) { 
305                                 //get name from bin
306                                 string name = names.substr(0,names.find_first_of(','));
307                                 //save name and bin number
308                                 seq2Bin[name] = i;
309                                 names = names.substr(names.find_first_of(',')+1, names.length());
310                         }
311                         
312                         //get last name
313                         seq2Bin[names] = i;
314                 }
315                 
316         }
317         catch(exception& e) {
318                 errorOut(e, "HCluster", "setMapWanted");
319                 exit(1);
320         }
321 }
322 /***********************************************************************/
323 void HCluster::updateMap() {
324 try {
325                 //update location of seqs in smallRow since they move to smallCol now
326                 string names = list->get(clusterArray[smallRow].smallChild);
327                 while (names.find_first_of(',') != -1) { 
328                         //get name from bin
329                         string name = names.substr(0,names.find_first_of(','));
330                         //save name and bin number
331                         seq2Bin[name] = clusterArray[smallCol].smallChild;
332                         names = names.substr(names.find_first_of(',')+1, names.length());
333                 }
334                         
335                 //get last name
336                 seq2Bin[names] = clusterArray[smallCol].smallChild;
337         }
338         catch(exception& e) {
339                 errorOut(e, "HCluster", "updateMap");
340                 exit(1);
341         }
342 }
343 /***********************************************************************/
344
345
346