5 * Created by Pat Schloss on 8/14/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
10 #include "cluster.hpp"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 #include "sparsematrix.hpp"
15 /***********************************************************************/
17 Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string f) :
18 rabund(rav), list(lv), dMatrix(dm), method(f)
22 cout << "sizeof(MatData): " << sizeof(MatData) << endl;
23 cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
25 int nCells = dMatrix->getNNodes();
26 time_t start = time(NULL);
28 MatVec matvec = MatVec(nCells);
30 for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
31 matvec[i++] = currentCell;
33 for (i= matvec.size();i>0;i--) {
34 dMatrix->rmCell(matvec[i-1]);
36 MatData it = dMatrix->begin();
37 while (it != dMatrix->end()) {
38 it = dMatrix->rmCell(it);
40 cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
42 MatData it = dMatrix->begin();
43 cout << it->row << "/" << it->column << "/" << it->dist << endl;
44 dMatrix->rmCell(dMatrix->begin());
45 cout << it->row << "/" << it->column << "/" << it->dist << endl;
49 // Create a data structure to quickly access the PCell information
50 // for a certain sequence. It consists of a vector of lists, where
51 // a list contains pointers (iterators) to the all distances related
52 // to a certain sequence. The Vector is accessed via the index of a
53 // sequence in the distance matrix.
55 //string temp = "temp";
56 //m->openOutputFile(temp, outtemp);
57 //cout << lv->size() << endl;
58 seqVec = vector<MatVec>(lv->size());
59 for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
60 //outtemp << currentCell->row << '\t' << currentCell->column << '\t' << currentCell->dist << endl;
61 seqVec[currentCell->row].push_back(currentCell);
62 seqVec[currentCell->column].push_back(currentCell);
65 mapWanted = false; //set to true by mgcluster to speed up overlap merge
67 //save so you can modify as it changes in average neighbor
69 m = MothurOut::getInstance();
73 m->errorOut(e, "Cluster", "Cluster");
78 /***********************************************************************/
80 void Cluster::getRowColCells() {
82 PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class
84 smallRow = smallCell->row; // get its row
85 smallCol = smallCell->column; // get its column
86 smallDist = smallCell->dist; // get the smallest distance
87 //cout << "small row = " << smallRow << "small col = " << smallCol << "small dist = " << smallDist << endl;
89 rowCells = seqVec[smallRow]; // all distances related to the row index
90 colCells = seqVec[smallCol]; // all distances related to the column index
91 nRowCells = rowCells.size();
92 nColCells = colCells.size();
93 //cout << "num rows = " << nRowCells << "num col = " << nColCells << endl;
95 //for (int i = 0; i < nColCells; i++) { cout << colCells[i]->row << '\t' << colCells[i]->column << endl; }
96 //for (int i = 0; i < nRowCells; i++) { cout << rowCells[i]->row << '\t' << rowCells[i]->column << endl; }
99 m->errorOut(e, "Cluster", "getRowColCells");
104 /***********************************************************************/
105 // Remove the specified cell from the seqVec and from the sparse
107 void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix){
110 ull drow = cell->row;
111 ull dcol = cell->column;
112 if (((vrow >=0) && (drow != smallRow)) ||
113 ((vcol >=0) && (dcol != smallCol))) {
123 nCells = seqVec[drow].size();
124 for (vrow=0; vrow<nCells;vrow++) {
125 crow = seqVec[drow][vrow]->row;
126 ccol = seqVec[drow][vrow]->column;
127 if (((crow == drow) && (ccol == dcol)) ||
128 ((ccol == drow) && (crow == dcol))) {
134 seqVec[drow].erase(seqVec[drow].begin()+vrow);
136 nCells = seqVec[dcol].size();
137 for (vcol=0; vcol<nCells;vcol++) {
138 crow = seqVec[dcol][vcol]->row;
139 ccol = seqVec[dcol][vcol]->column;
140 if (((crow == drow) && (ccol == dcol)) ||
141 ((ccol == drow) && (crow == dcol))) {
147 seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
150 //cout << " removing = " << cell->row << '\t' << cell->column << '\t' << cell->dist << endl;
151 dMatrix->rmCell(cell);
152 // cout << "done" << endl;
156 catch(exception& e) {
157 m->errorOut(e, "Cluster", "removeCell");
161 /***********************************************************************/
163 void Cluster::clusterBins(){
165 // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
167 rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
168 rabund->set(smallRow, 0);
169 rabund->setLabel(toString(smallDist));
171 // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
173 catch(exception& e) {
174 m->errorOut(e, "Cluster", "clusterBins");
181 /***********************************************************************/
183 void Cluster::clusterNames(){
185 // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
186 if (mapWanted) { updateMap(); }
188 list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
189 list->set(smallRow, "");
190 list->setLabel(toString(smallDist));
192 // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
194 catch(exception& e) {
195 m->errorOut(e, "Cluster", "clusterNames");
201 /***********************************************************************/
202 //This function clusters based on the method of the derived class
203 //At the moment only average and complete linkage are covered, because
204 //single linkage uses a different approach.
205 void Cluster::update(double& cutOFF){
209 vector<int> foundCol(nColCells, 0);
214 // The vector has to be traversed in reverse order to preserve the index
215 // for faster removal in removeCell()
216 for (int i=nRowCells-1;i>=0;i--) {
217 //if you are not the smallCell
218 if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
219 if (rowCells[i]->row == smallRow) {
220 search = rowCells[i]->column;
222 search = rowCells[i]->row;
226 for (int j=0;j<nColCells;j++) {
227 if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
228 if (colCells[j]->row == search || colCells[j]->column == search) {
231 changed = updateDistance(colCells[j], rowCells[i]);
232 // If the cell's distance changed and it had the same distance as
233 // the smallest distance, invalidate the mins vector in SparseMatrix
235 if (colCells[j]->vectorMap != NULL) {
236 *(colCells[j]->vectorMap) = NULL;
237 colCells[j]->vectorMap = NULL;
244 //if not merged it you need it for warning
245 if ((!merged) && (method == "average" || method == "weighted")) {
246 //m->mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
247 if (cutOFF > rowCells[i]->dist) {
248 cutOFF = rowCells[i]->dist;
249 //m->mothurOut("changing cutoff to " + toString(cutOFF)); m->mothurOutEndLine();
253 removeCell(rowCells[i], i , -1);
260 // Special handling for singlelinkage case, not sure whether this
262 for (int i=nColCells-1;i>=0;i--) {
263 if (foundCol[i] == 0) {
264 if (method == "average" || method == "weighted") {
265 if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
266 //m->mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
267 if (cutOFF > colCells[i]->dist) {
268 cutOFF = colCells[i]->dist;
269 //m->mothurOut("changing cutoff to " + toString(cutOFF)); m->mothurOutEndLine();
273 removeCell(colCells[i], -1, i);
277 catch(exception& e) {
278 m->errorOut(e, "Cluster", "update");
282 /***********************************************************************/
283 void Cluster::setMapWanted(bool f) {
288 for (int i = 0; i < list->getNumBins(); i++) {
291 string names = list->get(i);
292 while (names.find_first_of(',') != -1) {
294 string name = names.substr(0,names.find_first_of(','));
295 //save name and bin number
297 names = names.substr(names.find_first_of(',')+1, names.length());
305 catch(exception& e) {
306 m->errorOut(e, "Cluster", "setMapWanted");
310 /***********************************************************************/
311 void Cluster::updateMap() {
313 //update location of seqs in smallRow since they move to smallCol now
314 string names = list->get(smallRow);
315 while (names.find_first_of(',') != -1) {
317 string name = names.substr(0,names.find_first_of(','));
318 //save name and bin number
319 seq2Bin[name] = smallCol;
320 names = names.substr(names.find_first_of(',')+1, names.length());
324 seq2Bin[names] = smallCol;
327 catch(exception& e) {
328 m->errorOut(e, "Cluster", "updateMap");
332 /***********************************************************************/