X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=cluster.cpp;h=6b69e4d5312fcce5db6c084fd42499b64724fb5e;hp=1ccb34943d32b907b656fd6fc6272a11501f549f;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=c5c7502f435e1413c19e373dab1dfebcaa67588d diff --git a/cluster.cpp b/cluster.cpp index 1ccb349..6b69e4d 100644 --- a/cluster.cpp +++ b/cluster.cpp @@ -10,106 +10,194 @@ #include "cluster.hpp" #include "rabundvector.hpp" #include "listvector.hpp" -#include "sparsematrix.hpp" /***********************************************************************/ -Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) : -rabund(rav), list(lv), dMatrix(dm) +Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) : +rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs) { -} - -/***********************************************************************/ - -void Cluster::getRowColCells(){ try { - PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class - - smallRow = smallCell->row; //get its row - smallCol = smallCell->column; //get its column - smallDist = smallCell->dist; //get the smallest distance - - rowCells.clear(); - colCells.clear(); - - for(MatData currentCell=dMatrix->begin();currentCell!=dMatrix->end();currentCell++){ - - if(&*currentCell == smallCell){ //put the smallest cell first - rowCells.insert(rowCells.begin(), currentCell); - colCells.insert(colCells.begin(), currentCell); - } - else if(currentCell->row == smallRow){ - rowCells.push_back(currentCell); - } - else if(currentCell->column == smallRow){ - rowCells.push_back(currentCell); - } - else if(currentCell->row == smallCol){ - colCells.push_back(currentCell); - } - else if(currentCell->column == smallCol){ - colCells.push_back(currentCell); - } - } - - nRowCells = rowCells.size(); - nColCells = colCells.size(); + + mapWanted = false; //set to true by mgcluster to speed up overlap merge + + //save so you can modify as it changes in average neighbor + cutoff = c; + m = MothurOut::getInstance(); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function getRowColCells. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Cluster", "Cluster"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Cluster class function getRowColCells. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } - /***********************************************************************/ - void Cluster::clusterBins(){ try { - // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol); - - rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol)); + rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol)); rabund->set(smallRow, 0); rabund->setLabel(toString(smallDist)); - - // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function clusterBins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Cluster", "clusterBins"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Cluster class function clusterBins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - - } - /***********************************************************************/ void Cluster::clusterNames(){ try { - // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol); - + if (mapWanted) { updateMap(); } + list->set(smallCol, list->get(smallRow)+','+list->get(smallCol)); list->set(smallRow, ""); list->setLabel(toString(smallDist)); - - // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function clusterNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Cluster", "clusterNames"); + exit(1); + } +} +/***********************************************************************/ +void Cluster::update(double& cutOFF){ + try { + smallCol = dMatrix->getSmallestCell(smallRow); + nColCells = dMatrix->seqVec[smallCol].size(); + nRowCells = dMatrix->seqVec[smallRow].size(); + + vector foundCol(nColCells, 0); + //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl; + int search; + bool changed; + + for (int i=nRowCells-1;i>=0;i--) { + if (m->control_pressed) { break; } + + //if you are not the smallCell + if (dMatrix->seqVec[smallRow][i].index != smallCol) { + search = dMatrix->seqVec[smallRow][i].index; + + bool merged = false; + for (int j=0;jseqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance + if (dMatrix->seqVec[smallCol][j].index == search) { + foundCol[j] = 1; + merged = true; + changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]); + dMatrix->updateCellCompliment(smallCol, j); + break; + }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell + if (adjust != -1.0) { //adjust + merged = true; + PDistCell value(search, adjust); //create a distance for the missing value + int location = dMatrix->addCellSorted(smallCol, value); + changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]); + dMatrix->updateCellCompliment(smallCol, location); + nColCells++; + foundCol.push_back(0); //add a new found column + //adjust value + for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; } + foundCol[location] = 1; + } + j+=nColCells; + } + } + } + //if not merged it you need it for warning + if ((!merged) && (method == "average" || method == "weighted")) { + if (cutOFF > dMatrix->seqVec[smallRow][i].dist) { + cutOFF = dMatrix->seqVec[smallRow][i].dist; + //cout << "changing cutoff to " << cutOFF << endl; + } + + } + dMatrix->rmCell(smallRow, i); + } + } + clusterBins(); + clusterNames(); + + // Special handling for singlelinkage case, not sure whether this + // could be avoided + for (int i=nColCells-1;i>=0;i--) { + if (foundCol[i] == 0) { + if (adjust != -1.0) { //adjust + PDistCell value(smallCol, adjust); //create a distance for the missing value + changed = updateDistance(dMatrix->seqVec[smallCol][i], value); + dMatrix->updateCellCompliment(smallCol, i); + }else { + if (method == "average" || method == "weighted") { + if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance + if (cutOFF > dMatrix->seqVec[smallCol][i].dist) { + cutOFF = dMatrix->seqVec[smallCol][i].dist; + } + } + } + } + dMatrix->rmCell(smallCol, i); + } + } + + } + catch(exception& e) { + m->errorOut(e, "Cluster", "update"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Cluster class function clusterNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +/***********************************************************************/ +void Cluster::setMapWanted(bool f) { + try { + mapWanted = f; + + //initialize map + for (int k = 0; k < list->getNumBins(); k++) { + + string names = list->get(k); + + //parse bin + string individual = ""; + int binNameslength = names.size(); + for(int j=0;jerrorOut(e, "Cluster", "setMapWanted"); exit(1); - } + } +} +/***********************************************************************/ +void Cluster::updateMap() { + try { + //update location of seqs in smallRow since they move to smallCol now + string names = list->get(smallRow); + + string individual = ""; + int binNameslength = names.size(); + for(int j=0;jerrorOut(e, "Cluster", "updateMap"); + exit(1); + } } - /***********************************************************************/ + +