]> git.donarmstrong.com Git - mothur.git/blob - cluster.cpp
6fd463116e65becc0ef2a969c08d5bfaf6dfea15
[mothur.git] / cluster.cpp
1 /*
2  *  cluster.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/14/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "cluster.hpp"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 #include "sparsematrix.hpp"
14
15 /***********************************************************************/
16
17 Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
18 rabund(rav), list(lv), dMatrix(dm)
19 {
20 /*
21         cout << "sizeof(MatData): " << sizeof(MatData) << endl;
22         cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
23
24         int nCells = dMatrix->getNNodes();
25         time_t start = time(NULL);
26
27         MatVec matvec = MatVec(nCells); 
28         int i = 0;
29         for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
30                 matvec[i++] = currentCell;
31         }
32         for (i= matvec.size();i>0;i--) {
33                 dMatrix->rmCell(matvec[i-1]);
34         }
35         MatData it = dMatrix->begin(); 
36         while (it != dMatrix->end()) { 
37                 it = dMatrix->rmCell(it);
38         }
39         cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
40     exit(0);
41         MatData it = dMatrix->begin();
42         cout << it->row << "/" << it->column << "/" << it->dist << endl;
43         dMatrix->rmCell(dMatrix->begin());
44         cout << it->row << "/" << it->column << "/" << it->dist << endl;
45         exit(0);
46 */
47
48         // Create a data structure to quickly access the PCell information
49         // for a certain sequence. It consists of a vector of lists, where 
50         // a list contains pointers (iterators) to the all distances related
51         // to a certain sequence. The Vector is accessed via the index of a 
52         // sequence in the distance matrix.
53         seqVec = vector<MatVec>(lv->size());
54         for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
55                 seqVec[currentCell->row].push_back(currentCell);
56                 seqVec[currentCell->column].push_back(currentCell);
57         }
58 }
59
60 /***********************************************************************/
61
62 void Cluster::getRowColCells() {
63         try {
64                 PCell* smallCell = dMatrix->getSmallestCell();  //find the smallest cell - this routine should probably not be in the SpMat class
65         
66                 smallRow = smallCell->row;              // get its row
67                 smallCol = smallCell->column;   // get its column
68                 smallDist = smallCell->dist;    // get the smallest distance
69         
70                 rowCells = seqVec[smallRow];    // all distances related to the row index
71                 colCells = seqVec[smallCol];    // all distances related to the column index
72                 nRowCells = rowCells.size();
73                 nColCells = colCells.size();
74         }
75         catch(exception& e) {
76                 errorOut(e, "Cluster", "getRowColCells");
77                 exit(1);
78         }
79
80 }
81
82 // Remove the specified cell from the seqVec and from the sparse
83 // matrix
84 void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix)
85 {
86         ull drow = cell->row;
87         ull dcol = cell->column;
88         if (((vrow >=0) && (drow != smallRow)) ||
89                 ((vcol >=0) && (dcol != smallCol))) {
90                 ull dtemp = drow;
91                 drow = dcol;
92                 dcol = dtemp;
93         }
94
95         ull crow;
96         ull ccol;
97         int nCells;
98         if (vrow < 0) {
99                 nCells = seqVec[drow].size();
100                 for (vrow=0; vrow<nCells;vrow++) {
101                         crow = seqVec[drow][vrow]->row;
102                         ccol = seqVec[drow][vrow]->column;
103                         if (((crow == drow) && (ccol == dcol)) ||
104                                 ((ccol == drow) && (crow == dcol))) {
105                                 break;
106                         }
107                 }
108         }
109         seqVec[drow].erase(seqVec[drow].begin()+vrow);
110         if (vcol < 0) {
111                 nCells = seqVec[dcol].size();
112                 for (vcol=0; vcol<nCells;vcol++) {
113                         crow = seqVec[dcol][vcol]->row;
114                         ccol = seqVec[dcol][vcol]->column;
115                         if (((crow == drow) && (ccol == dcol)) ||
116                                 ((ccol == drow) && (crow == dcol))) {
117                                 break;
118                         }
119                 }
120         }
121         seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
122         if (rmMatrix) {
123                 dMatrix->rmCell(cell);
124         }
125 }
126
127
128 /***********************************************************************/
129
130 void Cluster::clusterBins(){
131         try {
132         //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
133
134                 rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
135                 rabund->set(smallRow, 0);       
136                 rabund->setLabel(toString(smallDist));
137
138         //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
139         }
140         catch(exception& e) {
141                 errorOut(e, "Cluster", "clusterBins");
142                 exit(1);
143         }
144
145
146 }
147
148 /***********************************************************************/
149
150 void Cluster::clusterNames(){
151         try {
152         //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
153
154                 list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
155                 list->set(smallRow, "");        
156                 list->setLabel(toString(smallDist));
157         
158         //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
159     }
160         catch(exception& e) {
161                 errorOut(e, "Cluster", "clusterNames");
162                 exit(1);
163         }
164
165 }
166
167 /***********************************************************************/
168 //This function clusters based on the method of the derived class
169 //At the moment only average and complete linkage are covered, because
170 //single linkage uses a different approach.
171 void Cluster::update(){
172         try {
173                 getRowColCells();       
174         
175                 vector<int> found(nColCells, 0);
176                 int search;
177                 bool changed;
178
179                 // The vector has to be traversed in reverse order to preserve the index
180                 // for faster removal in removeCell()
181                 for (int i=nRowCells-1;i>=0;i--) {
182                         if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
183                                 if (rowCells[i]->row == smallRow) {
184                                         search = rowCells[i]->column;
185                                 } else {
186                                         search = rowCells[i]->row;
187                                 }
188                 
189                                 for (int j=0;j<nColCells;j++) {
190                                         if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) {
191                                                 if (colCells[j]->row == search || colCells[j]->column == search) {
192                                                         found[j] = 1;
193                                                         changed = updateDistance(colCells[j], rowCells[i]);
194                                                         // If the cell's distance changed and it had the same distance as 
195                                                         // the smallest distance, invalidate the mins vector in SparseMatrix
196                                                         if (changed) {
197                                                                 if (colCells[j]->vectorMap != NULL) {
198                                                                         *(colCells[j]->vectorMap) = NULL;
199                                                                         colCells[j]->vectorMap = NULL;
200                                                                 }
201                                                         }
202                                                         break;
203                                                 }
204                                         }
205                                 }
206                                 removeCell(rowCells[i], i , -1);
207                         }
208                 }
209                 clusterBins();
210                 clusterNames();
211
212                 // Special handling for singlelinkage case, not sure whether this
213                 // could be avoided
214                 for (int i=nColCells-1;i>=0;i--) {
215                         if (found[i] == 0) {
216                                 removeCell(colCells[i], -1, i);
217                         }
218                 }
219         }
220         catch(exception& e) {
221                 errorOut(e, "Cluster", "update");
222                 exit(1);
223         }
224 }
225
226
227 /***********************************************************************/
228