#include "cluster.hpp"
#include "rabundvector.hpp"
#include "listvector.hpp"
-#include "sparsematrix.hpp"
/***********************************************************************/
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
-rabund(rav), list(lv), dMatrix(dm)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
+rabund(rav), list(lv), dMatrix(dm), method(f)
{
-}
-
-/***********************************************************************/
-
-void Cluster::getRowColCells(){
try {
- PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class
-
- smallRow = smallCell->row; //get its row
- smallCol = smallCell->column; //get its column
- smallDist = smallCell->dist; //get the smallest distance
-
- rowCells.clear();
- colCells.clear();
-
- for(MatData currentCell=dMatrix->begin();currentCell!=dMatrix->end();currentCell++){
-
- if(&*currentCell == smallCell){ //put the smallest cell first
- rowCells.insert(rowCells.begin(), currentCell);
- colCells.insert(colCells.begin(), currentCell);
- }
- else if(currentCell->row == smallRow){
- rowCells.push_back(currentCell);
- }
- else if(currentCell->column == smallRow){
- rowCells.push_back(currentCell);
- }
- else if(currentCell->row == smallCol){
- colCells.push_back(currentCell);
- }
- else if(currentCell->column == smallCol){
- colCells.push_back(currentCell);
- }
- }
-
- nRowCells = rowCells.size();
- nColCells = colCells.size();
+
+ mapWanted = false; //set to true by mgcluster to speed up overlap merge
+
+ //save so you can modify as it changes in average neighbor
+ cutoff = c;
+ m = MothurOut::getInstance();
}
catch(exception& e) {
- errorOut(e, "Cluster", "getRowColCells");
+ m->errorOut(e, "Cluster", "Cluster");
exit(1);
}
}
-
/***********************************************************************/
-
void Cluster::clusterBins(){
try {
-
- rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
+ rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
rabund->set(smallRow, 0);
rabund->setLabel(toString(smallDist));
-
}
catch(exception& e) {
- errorOut(e, "Cluster", "clusterBins");
+ m->errorOut(e, "Cluster", "clusterBins");
exit(1);
}
}
-
/***********************************************************************/
void Cluster::clusterNames(){
try {
-
+ if (mapWanted) { updateMap(); }
+
list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
list->set(smallRow, "");
list->setLabel(toString(smallDist));
-
}
catch(exception& e) {
- errorOut(e, "Cluster", "clusterNames");
+ m->errorOut(e, "Cluster", "clusterNames");
exit(1);
}
}
-
/***********************************************************************/
+void Cluster::update(double& cutOFF){
+ try {
+ smallCol = dMatrix->getSmallestCell(smallRow);
+ nColCells = dMatrix->seqVec[smallCol].size();
+ nRowCells = dMatrix->seqVec[smallRow].size();
+
+ vector<int> foundCol(nColCells, 0);
+ //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
+ int search;
+ bool changed;
+
+ for (int i=nRowCells-1;i>=0;i--) {
+ if (m->control_pressed) { break; }
+
+ //if you are not the smallCell
+ if (dMatrix->seqVec[smallRow][i].index != smallCol) {
+ search = dMatrix->seqVec[smallRow][i].index;
+
+ bool merged = false;
+ for (int j=0;j<nColCells;j++) {
+
+ if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance
+ if (dMatrix->seqVec[smallCol][j].index == search) {
+ foundCol[j] = 1;
+ merged = true;
+ changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+ dMatrix->updateCellCompliment(smallCol, j);
+ break;
+ }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell
+ }
+ }
+ //if not merged it you need it for warning
+ if ((!merged) && (method == "average" || method == "weighted")) {
+ if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {
+ cutOFF = dMatrix->seqVec[smallRow][i].dist;
+ //cout << "changing cutoff to " << cutOFF << endl;
+ }
+
+ }
+ dMatrix->rmCell(smallRow, i);
+ }
+ }
+ clusterBins();
+ clusterNames();
+
+ // Special handling for singlelinkage case, not sure whether this
+ // could be avoided
+ for (int i=nColCells-1;i>=0;i--) {
+ if (foundCol[i] == 0) {
+ if (method == "average" || method == "weighted") {
+ if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance
+ if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {
+ cutOFF = dMatrix->seqVec[smallCol][i].dist;
+ }
+ }
+ }
+ dMatrix->rmCell(smallCol, i);
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Cluster", "update");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void Cluster::setMapWanted(bool f) {
+ try {
+ mapWanted = f;
+
+ //initialize map
+ for (int k = 0; k < list->getNumBins(); k++) {
+
+ string names = list->get(k);
+
+ //parse bin
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = k;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = k;
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Cluster", "setMapWanted");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void Cluster::updateMap() {
+ try {
+ //update location of seqs in smallRow since they move to smallCol now
+ string names = list->get(smallRow);
+
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = smallCol;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = smallCol;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Cluster", "updateMap");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+