#include "cluster.hpp"
#include "rabundvector.hpp"
#include "listvector.hpp"
-#include "sparsematrix.hpp"
/***********************************************************************/
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string f) :
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
rabund(rav), list(lv), dMatrix(dm), method(f)
{
-/*
- cout << "sizeof(MatData): " << sizeof(MatData) << endl;
- cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
-
- int nCells = dMatrix->getNNodes();
- time_t start = time(NULL);
-
- MatVec matvec = MatVec(nCells);
- int i = 0;
- for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
- matvec[i++] = currentCell;
- }
- for (i= matvec.size();i>0;i--) {
- dMatrix->rmCell(matvec[i-1]);
- }
- MatData it = dMatrix->begin();
- while (it != dMatrix->end()) {
- it = dMatrix->rmCell(it);
- }
- cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
- exit(0);
- MatData it = dMatrix->begin();
- cout << it->row << "/" << it->column << "/" << it->dist << endl;
- dMatrix->rmCell(dMatrix->begin());
- cout << it->row << "/" << it->column << "/" << it->dist << endl;
- exit(0);
-*/
-
- // Create a data structure to quickly access the PCell information
- // for a certain sequence. It consists of a vector of lists, where
- // a list contains pointers (iterators) to the all distances related
- // to a certain sequence. The Vector is accessed via the index of a
- // sequence in the distance matrix.
-
-
- seqVec = vector<MatVec>(lv->size());
- for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
- seqVec[currentCell->row].push_back(currentCell);
- seqVec[currentCell->column].push_back(currentCell);
- }
-
- mapWanted = false; //set to true by mgcluster to speed up overlap merge
-
- //save so you can modify as it changes in average neighbor
- cutoff = c;
- m = MothurOut::getInstance();
-}
-
-/***********************************************************************/
-
-void Cluster::getRowColCells() {
try {
- PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class
-
- smallRow = smallCell->row; // get its row
- smallCol = smallCell->column; // get its column
- smallDist = smallCell->dist; // get the smallest distance
-
- rowCells = seqVec[smallRow]; // all distances related to the row index
- colCells = seqVec[smallCol]; // all distances related to the column index
- nRowCells = rowCells.size();
- nColCells = colCells.size();
+
+ mapWanted = false; //set to true by mgcluster to speed up overlap merge
+
+ //save so you can modify as it changes in average neighbor
+ cutoff = c;
+ m = MothurOut::getInstance();
}
catch(exception& e) {
- m->errorOut(e, "Cluster", "getRowColCells");
+ m->errorOut(e, "Cluster", "Cluster");
exit(1);
}
-
}
/***********************************************************************/
-// Remove the specified cell from the seqVec and from the sparse
-// matrix
-void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix){
- try {
-
- ull drow = cell->row;
- ull dcol = cell->column;
- if (((vrow >=0) && (drow != smallRow)) ||
- ((vcol >=0) && (dcol != smallCol))) {
- ull dtemp = drow;
- drow = dcol;
- dcol = dtemp;
- }
-
- ull crow;
- ull ccol;
- int nCells;
- if (vrow < 0) {
- nCells = seqVec[drow].size();
- for (vrow=0; vrow<nCells;vrow++) {
- crow = seqVec[drow][vrow]->row;
- ccol = seqVec[drow][vrow]->column;
- if (((crow == drow) && (ccol == dcol)) ||
- ((ccol == drow) && (crow == dcol))) {
- break;
- }
- }
- }
-
- seqVec[drow].erase(seqVec[drow].begin()+vrow);
- if (vcol < 0) {
- nCells = seqVec[dcol].size();
- for (vcol=0; vcol<nCells;vcol++) {
- crow = seqVec[dcol][vcol]->row;
- ccol = seqVec[dcol][vcol]->column;
- if (((crow == drow) && (ccol == dcol)) ||
- ((ccol == drow) && (crow == dcol))) {
- break;
- }
- }
- }
-
- seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
-
- if (rmMatrix) {
- dMatrix->rmCell(cell);
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "Cluster", "removeCell");
- exit(1);
- }
-}
-/***********************************************************************/
-
void Cluster::clusterBins(){
try {
- // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
- rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
+ rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
rabund->set(smallRow, 0);
rabund->setLabel(toString(smallDist));
-
- // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
}
catch(exception& e) {
m->errorOut(e, "Cluster", "clusterBins");
exit(1);
}
-
-
}
-
/***********************************************************************/
void Cluster::clusterNames(){
try {
- // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
if (mapWanted) { updateMap(); }
list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
list->set(smallRow, "");
list->setLabel(toString(smallDist));
-
- // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
}
catch(exception& e) {
m->errorOut(e, "Cluster", "clusterNames");
exit(1);
}
-
}
-
/***********************************************************************/
-//This function clusters based on the method of the derived class
-//At the moment only average and complete linkage are covered, because
-//single linkage uses a different approach.
void Cluster::update(double& cutOFF){
try {
- getRowColCells();
-
+ smallCol = dMatrix->getSmallestCell(smallRow);
+ nColCells = dMatrix->seqVec[smallCol].size();
+ nRowCells = dMatrix->seqVec[smallRow].size();
+
vector<int> foundCol(nColCells, 0);
-
+ //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
int search;
bool changed;
-
- // The vector has to be traversed in reverse order to preserve the index
- // for faster removal in removeCell()
+
for (int i=nRowCells-1;i>=0;i--) {
- if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
- if (rowCells[i]->row == smallRow) {
- search = rowCells[i]->column;
- } else {
- search = rowCells[i]->row;
- }
-
+ if (m->control_pressed) { break; }
+
+ //if you are not the smallCell
+ if (dMatrix->seqVec[smallRow][i].index != smallCol) {
+ search = dMatrix->seqVec[smallRow][i].index;
+
bool merged = false;
for (int j=0;j<nColCells;j++) {
- if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
- if (colCells[j]->row == search || colCells[j]->column == search) {
+
+ if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance
+ if (dMatrix->seqVec[smallCol][j].index == search) {
foundCol[j] = 1;
merged = true;
- changed = updateDistance(colCells[j], rowCells[i]);
- // If the cell's distance changed and it had the same distance as
- // the smallest distance, invalidate the mins vector in SparseMatrix
- if (changed) {
- if (colCells[j]->vectorMap != NULL) {
- *(colCells[j]->vectorMap) = NULL;
- colCells[j]->vectorMap = NULL;
- }
- }
+ changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+ dMatrix->updateCellCompliment(smallCol, j);
break;
- }
- }
+ }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell
+ }
}
//if not merged it you need it for warning
- if ((!merged) && (method == "average")) {
- //m->mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
- if (cutOFF > rowCells[i]->dist) {
- cutOFF = rowCells[i]->dist;
- //m->mothurOut("changing cutoff to " + toString(cutOFF)); m->mothurOutEndLine();
+ if ((!merged) && (method == "average" || method == "weighted")) {
+ if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {
+ cutOFF = dMatrix->seqVec[smallRow][i].dist;
}
-
+
}
- removeCell(rowCells[i], i , -1);
-
+ dMatrix->rmCell(smallRow, i);
}
}
clusterBins();
clusterNames();
-
+
// Special handling for singlelinkage case, not sure whether this
// could be avoided
for (int i=nColCells-1;i>=0;i--) {
- if (foundCol[i] == 0) {
- if (method == "average") {
- if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
- //m->mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
- if (cutOFF > colCells[i]->dist) {
- cutOFF = colCells[i]->dist;
- //m->mothurOut("changing cutoff to " + toString(cutOFF)); m->mothurOutEndLine();
+ if (foundCol[i] == 0) {
+ if (method == "average" || method == "weighted") {
+ if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance
+ if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {
+ cutOFF = dMatrix->seqVec[smallCol][i].dist;
}
}
}
- removeCell(colCells[i], -1, i);
+ dMatrix->rmCell(smallCol, i);
}
}
+
}
catch(exception& e) {
m->errorOut(e, "Cluster", "update");
try {
mapWanted = f;
- //initialize map
- for (int i = 0; i < list->getNumBins(); i++) {
-
- //parse bin
- string names = list->get(i);
- while (names.find_first_of(',') != -1) {
- //get name from bin
- string name = names.substr(0,names.find_first_of(','));
- //save name and bin number
- seq2Bin[name] = i;
- names = names.substr(names.find_first_of(',')+1, names.length());
- }
-
- //get last name
- seq2Bin[names] = i;
+ //initialize map
+ for (int k = 0; k < list->getNumBins(); k++) {
+
+ string names = list->get(k);
+
+ //parse bin
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = k;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = k;
}
}
}
/***********************************************************************/
void Cluster::updateMap() {
-try {
+ try {
//update location of seqs in smallRow since they move to smallCol now
string names = list->get(smallRow);
- while (names.find_first_of(',') != -1) {
- //get name from bin
- string name = names.substr(0,names.find_first_of(','));
- //save name and bin number
- seq2Bin[name] = smallCol;
- names = names.substr(names.find_first_of(',')+1, names.length());
- }
-
- //get last name
- seq2Bin[names] = smallCol;
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = smallCol;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = smallCol;
+
}
catch(exception& e) {
m->errorOut(e, "Cluster", "updateMap");