/***********************************************************************/
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
-rabund(rav), list(lv), dMatrix(dm)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) :
+rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs)
{
-}
-
-/***********************************************************************/
-
-void Cluster::getRowColCells(){
try {
- PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class
-
- smallRow = smallCell->row; //get its row
- smallCol = smallCell->column; //get its column
- smallDist = smallCell->dist; //get the smallest distance
-
- rowCells.clear();
- colCells.clear();
-
- for(MatData currentCell=dMatrix->begin();currentCell!=dMatrix->end();currentCell++){
-
- if(&*currentCell == smallCell){ //put the smallest cell first
- rowCells.insert(rowCells.begin(), currentCell);
- colCells.insert(colCells.begin(), currentCell);
- }
- else if(currentCell->row == smallRow){
- rowCells.push_back(currentCell);
- }
- else if(currentCell->column == smallRow){
- rowCells.push_back(currentCell);
- }
- else if(currentCell->row == smallCol){
- colCells.push_back(currentCell);
- }
- else if(currentCell->column == smallCol){
- colCells.push_back(currentCell);
- }
- }
-
- nRowCells = rowCells.size();
- nColCells = colCells.size();
+
+ mapWanted = false; //set to true by mgcluster to speed up overlap merge
+
+ //save so you can modify as it changes in average neighbor
+ cutoff = c;
+ m = MothurOut::getInstance();
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function getRowColCells. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ m->errorOut(e, "Cluster", "Cluster");
exit(1);
}
- catch(...) {
- cout << "An unknown error has occurred in the Cluster class function getRowColCells. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
}
-
/***********************************************************************/
-
void Cluster::clusterBins(){
try {
- // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
- rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
+ rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
rabund->set(smallRow, 0);
rabund->setLabel(toString(smallDist));
-
- // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function clusterBins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ m->errorOut(e, "Cluster", "clusterBins");
exit(1);
}
- catch(...) {
- cout << "An unknown error has occurred in the Cluster class function clusterBins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-
-
}
-
/***********************************************************************/
void Cluster::clusterNames(){
try {
- // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
-
+ if (mapWanted) { updateMap(); }
+
list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
list->set(smallRow, "");
list->setLabel(toString(smallDist));
-
- // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function clusterNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ m->errorOut(e, "Cluster", "clusterNames");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void Cluster::update(double& cutOFF){
+ try {
+ smallCol = dMatrix->getSmallestCell(smallRow);
+ nColCells = dMatrix->seqVec[smallCol].size();
+ nRowCells = dMatrix->seqVec[smallRow].size();
+
+ vector<int> foundCol(nColCells, 0);
+ //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
+ int search;
+ bool changed;
+
+ for (int i=nRowCells-1;i>=0;i--) {
+ if (m->control_pressed) { break; }
+
+ //if you are not the smallCell
+ if (dMatrix->seqVec[smallRow][i].index != smallCol) {
+ search = dMatrix->seqVec[smallRow][i].index;
+
+ bool merged = false;
+ for (int j=0;j<nColCells;j++) {
+
+ if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance
+ if (dMatrix->seqVec[smallCol][j].index == search) {
+ foundCol[j] = 1;
+ merged = true;
+ changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+ dMatrix->updateCellCompliment(smallCol, j);
+ break;
+ }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell
+ if (adjust != -1.0) { //adjust
+ merged = true;
+ PDistCell value(search, adjust); //create a distance for the missing value
+ int location = dMatrix->addCellSorted(smallCol, value);
+ changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]);
+ dMatrix->updateCellCompliment(smallCol, location);
+ nColCells++;
+ foundCol.push_back(0); //add a new found column
+ //adjust value
+ for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
+ foundCol[location] = 1;
+ }
+ j+=nColCells;
+ }
+ }
+ }
+ //if not merged it you need it for warning
+ if ((!merged) && (method == "average" || method == "weighted")) {
+ if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {
+ cutOFF = dMatrix->seqVec[smallRow][i].dist;
+ //cout << "changing cutoff to " << cutOFF << endl;
+ }
+
+ }
+ dMatrix->rmCell(smallRow, i);
+ }
+ }
+ clusterBins();
+ clusterNames();
+
+ // Special handling for singlelinkage case, not sure whether this
+ // could be avoided
+ for (int i=nColCells-1;i>=0;i--) {
+ if (foundCol[i] == 0) {
+ if (adjust != -1.0) { //adjust
+ PDistCell value(smallCol, adjust); //create a distance for the missing value
+ changed = updateDistance(dMatrix->seqVec[smallCol][i], value);
+ dMatrix->updateCellCompliment(smallCol, i);
+ }else {
+ if (method == "average" || method == "weighted") {
+ if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance
+ if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {
+ cutOFF = dMatrix->seqVec[smallCol][i].dist;
+ }
+ }
+ }
+ }
+ dMatrix->rmCell(smallCol, i);
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Cluster", "update");
exit(1);
}
- catch(...) {
- cout << "An unknown error has occurred in the Cluster class function clusterNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+/***********************************************************************/
+void Cluster::setMapWanted(bool f) {
+ try {
+ mapWanted = f;
+
+ //initialize map
+ for (int k = 0; k < list->getNumBins(); k++) {
+
+ string names = list->get(k);
+
+ //parse bin
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = k;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = k;
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Cluster", "setMapWanted");
exit(1);
- }
+ }
+}
+/***********************************************************************/
+void Cluster::updateMap() {
+ try {
+ //update location of seqs in smallRow since they move to smallCol now
+ string names = list->get(smallRow);
+
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = smallCol;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = smallCol;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Cluster", "updateMap");
+ exit(1);
+ }
}
-
/***********************************************************************/
+
+