5 * Created by Pat Schloss on 8/14/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
10 #include "cluster.hpp"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 #include "sparsematrix.hpp"
15 /***********************************************************************/
17 Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
18 rabund(rav), list(lv), dMatrix(dm), cutoff(c)
21 cout << "sizeof(MatData): " << sizeof(MatData) << endl;
22 cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
24 int nCells = dMatrix->getNNodes();
25 time_t start = time(NULL);
27 MatVec matvec = MatVec(nCells);
29 for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
30 matvec[i++] = currentCell;
32 for (i= matvec.size();i>0;i--) {
33 dMatrix->rmCell(matvec[i-1]);
35 MatData it = dMatrix->begin();
36 while (it != dMatrix->end()) {
37 it = dMatrix->rmCell(it);
39 cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
41 MatData it = dMatrix->begin();
42 cout << it->row << "/" << it->column << "/" << it->dist << endl;
43 dMatrix->rmCell(dMatrix->begin());
44 cout << it->row << "/" << it->column << "/" << it->dist << endl;
48 // Create a data structure to quickly access the PCell information
49 // for a certain sequence. It consists of a vector of lists, where
50 // a list contains pointers (iterators) to the all distances related
51 // to a certain sequence. The Vector is accessed via the index of a
52 // sequence in the distance matrix.
53 seqVec = vector<MatVec>(lv->size());
54 for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
55 seqVec[currentCell->row].push_back(currentCell);
56 seqVec[currentCell->column].push_back(currentCell);
58 mapWanted = false; //set to true by mgcluster to speed up overlap merge
61 /***********************************************************************/
63 void Cluster::getRowColCells() {
65 PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class
67 smallRow = smallCell->row; // get its row
68 smallCol = smallCell->column; // get its column
69 smallDist = smallCell->dist; // get the smallest distance
71 rowCells = seqVec[smallRow]; // all distances related to the row index
72 colCells = seqVec[smallCol]; // all distances related to the column index
73 nRowCells = rowCells.size();
74 nColCells = colCells.size();
77 errorOut(e, "Cluster", "getRowColCells");
82 /***********************************************************************/
83 // Remove the specified cell from the seqVec and from the sparse
85 void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix)
88 ull dcol = cell->column;
89 if (((vrow >=0) && (drow != smallRow)) ||
90 ((vcol >=0) && (dcol != smallCol))) {
100 nCells = seqVec[drow].size();
101 for (vrow=0; vrow<nCells;vrow++) {
102 crow = seqVec[drow][vrow]->row;
103 ccol = seqVec[drow][vrow]->column;
104 if (((crow == drow) && (ccol == dcol)) ||
105 ((ccol == drow) && (crow == dcol))) {
110 seqVec[drow].erase(seqVec[drow].begin()+vrow);
112 nCells = seqVec[dcol].size();
113 for (vcol=0; vcol<nCells;vcol++) {
114 crow = seqVec[dcol][vcol]->row;
115 ccol = seqVec[dcol][vcol]->column;
116 if (((crow == drow) && (ccol == dcol)) ||
117 ((ccol == drow) && (crow == dcol))) {
122 seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
124 dMatrix->rmCell(cell);
129 /***********************************************************************/
131 void Cluster::clusterBins(){
133 // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
135 rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
136 rabund->set(smallRow, 0);
137 rabund->setLabel(toString(smallDist));
139 // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
141 catch(exception& e) {
142 errorOut(e, "Cluster", "clusterBins");
149 /***********************************************************************/
151 void Cluster::clusterNames(){
153 // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
154 if (mapWanted) { updateMap(); }
156 list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
157 list->set(smallRow, "");
158 list->setLabel(toString(smallDist));
160 // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
162 catch(exception& e) {
163 errorOut(e, "Cluster", "clusterNames");
169 /***********************************************************************/
170 //This function clusters based on the method of the derived class
171 //At the moment only average and complete linkage are covered, because
172 //single linkage uses a different approach.
173 void Cluster::update(){
177 vector<int> found(nColCells, 0);
181 // The vector has to be traversed in reverse order to preserve the index
182 // for faster removal in removeCell()
183 for (int i=nRowCells-1;i>=0;i--) {
184 if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
185 if (rowCells[i]->row == smallRow) {
186 search = rowCells[i]->column;
188 search = rowCells[i]->row;
191 for (int j=0;j<nColCells;j++) {
192 if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) {
193 if (colCells[j]->row == search || colCells[j]->column == search) {
195 changed = updateDistance(colCells[j], rowCells[i]);
196 // If the cell's distance changed and it had the same distance as
197 // the smallest distance, invalidate the mins vector in SparseMatrix
199 if (colCells[j]->vectorMap != NULL) {
200 *(colCells[j]->vectorMap) = NULL;
201 colCells[j]->vectorMap = NULL;
208 removeCell(rowCells[i], i , -1);
214 // Special handling for singlelinkage case, not sure whether this
216 for (int i=nColCells-1;i>=0;i--) {
218 removeCell(colCells[i], -1, i);
219 cout << "smallRow = " << smallRow+1 << " smallCol = " << smallCol+1 << endl;
220 if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
221 mothurOut("Warning: merging " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results will differ from those if cutoff was used in the read.dist command."); mothurOutEndLine();
226 catch(exception& e) {
227 errorOut(e, "Cluster", "update");
231 /***********************************************************************/
232 void Cluster::setMapWanted(bool m) {
237 for (int i = 0; i < list->getNumBins(); i++) {
240 string names = list->get(i);
241 while (names.find_first_of(',') != -1) {
243 string name = names.substr(0,names.find_first_of(','));
244 //save name and bin number
246 names = names.substr(names.find_first_of(',')+1, names.length());
254 catch(exception& e) {
255 errorOut(e, "Cluster", "setMapWanted");
259 /***********************************************************************/
260 void Cluster::updateMap() {
262 //update location of seqs in smallRow since they move to smallCol now
263 string names = list->get(smallRow);
264 while (names.find_first_of(',') != -1) {
266 string name = names.substr(0,names.find_first_of(','));
267 //save name and bin number
268 seq2Bin[name] = smallCol;
269 names = names.substr(names.find_first_of(',')+1, names.length());
273 seq2Bin[names] = smallCol;
276 catch(exception& e) {
277 errorOut(e, "Cluster", "updateMap");
281 /***********************************************************************/