]> git.donarmstrong.com Git - mothur.git/blobdiff - cluster.cpp
changes while testing
[mothur.git] / cluster.cpp
index 5bd009989aec5b42f6be23c00eef35fc51157d66..0a70fbfee99957de6f08315562f9f0d9ac14fee0 100644 (file)
 #include "cluster.hpp"
 #include "rabundvector.hpp"
 #include "listvector.hpp"
-#include <exception>
 
 /***********************************************************************/
 
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
-rabund(rav), list(lv), dMatrix(dm)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
+rabund(rav), list(lv), dMatrix(dm), method(f)
 {
-}
-
-/***********************************************************************/
-
-void Cluster::getRowColCells(){
        try {
-               PCell* smallCell = dMatrix->getSmallestCell();  //find the smallest cell - this routine should probably not be in the SpMat class
-       
-               smallRow = smallCell->row;              //get its row
-               smallCol = smallCell->column;   //get its column
-               smallDist = smallCell->dist;    //get the smallest distance
-       
-               rowCells.clear();
-               colCells.clear();
-               
-               for(MatData currentCell=dMatrix->begin();currentCell!=dMatrix->end();currentCell++){
-               
-                       if(&*currentCell == smallCell){                         //put the smallest cell first
-                               rowCells.insert(rowCells.begin(), currentCell);
-                               colCells.insert(colCells.begin(), currentCell);
-                       }
-                       else if(currentCell->row == smallRow){
-                               rowCells.push_back(currentCell);
-                       }
-                       else if(currentCell->column == smallRow){
-                               rowCells.push_back(currentCell);
-                       }
-                       else if(currentCell->row == smallCol){
-                               colCells.push_back(currentCell);
-                       }
-                       else if(currentCell->column == smallCol){
-                               colCells.push_back(currentCell);
-                       }
-               }
-       
-               nRowCells = rowCells.size();
-               nColCells = colCells.size();
+        
+        mapWanted = false;  //set to true by mgcluster to speed up overlap merge
+        
+        //save so you can modify as it changes in average neighbor
+        cutoff = c;
+        m = MothurOut::getInstance();
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function getRowColCells. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "Cluster", "Cluster");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the Cluster class function getRowColCells. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
 }
-
 /***********************************************************************/
-
 void Cluster::clusterBins(){
        try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
-               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
+               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
                rabund->set(smallRow, 0);       
                rabund->setLabel(toString(smallDist));
-
-       //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function clusterBins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "Cluster", "clusterBins");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the Cluster class function clusterBins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-
-
 }
-
 /***********************************************************************/
 
 void Cluster::clusterNames(){
        try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
-
+               if (mapWanted) {  updateMap();  }
+               
                list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
                list->set(smallRow, "");        
                list->setLabel(toString(smallDist));
-       
-       //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
     }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the Cluster class Function clusterNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "Cluster", "clusterNames");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void Cluster::update(double& cutOFF){
+       try {
+        smallCol = dMatrix->getSmallestCell(smallRow);
+        nColCells = dMatrix->seqVec[smallCol].size();
+        nRowCells = dMatrix->seqVec[smallRow].size();
+        
+               vector<int> foundCol(nColCells, 0);
+        //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
+               int search;
+               bool changed;
+        
+               for (int i=nRowCells-1;i>=0;i--) {
+            if (m->control_pressed) { break; }
+             
+                       //if you are not the smallCell
+                       if (dMatrix->seqVec[smallRow][i].index != smallCol) { 
+                search = dMatrix->seqVec[smallRow][i].index;
+                
+                               bool merged = false;
+                               for (int j=0;j<nColCells;j++) {
+                    
+                                       if (dMatrix->seqVec[smallCol][j].index != smallRow) {  //if you are not the smallest distance
+                                               if (dMatrix->seqVec[smallCol][j].index == search) {
+                                                       foundCol[j] = 1;
+                                                       merged = true;
+                                                       changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+                            dMatrix->updateCellCompliment(smallCol, j);
+                                                       break;
+                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell 
+                                       }       
+                               }
+                               //if not merged it you need it for warning 
+                               if ((!merged) && (method == "average" || method == "weighted")) {  
+                                       if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {  
+                                               cutOFF = dMatrix->seqVec[smallRow][i].dist;
+                        //cout << "changing cutoff to " << cutOFF << endl;
+                                       }
+                    
+                               }
+                               dMatrix->rmCell(smallRow, i);
+                       }
+               }
+               clusterBins();
+               clusterNames();
+        
+               // Special handling for singlelinkage case, not sure whether this
+               // could be avoided
+               for (int i=nColCells-1;i>=0;i--) {
+                       if (foundCol[i] == 0) { 
+                               if (method == "average" || method == "weighted") {
+                                       if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
+                                               if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
+                                                       cutOFF = dMatrix->seqVec[smallCol][i].dist;  
+                                               }
+                                       }
+                               }
+                dMatrix->rmCell(smallCol, i);
+                       }
+               }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Cluster", "update");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the Cluster class function clusterNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+/***********************************************************************/
+void Cluster::setMapWanted(bool f)  {  
+       try {
+               mapWanted = f;
+               
+        //initialize map
+               for (int k = 0; k < list->getNumBins(); k++) {
+            
+            string names = list->get(k);
+            
+            //parse bin
+            string individual = "";
+            int binNameslength = names.size();
+            for(int j=0;j<binNameslength;j++){
+                if(names[j] == ','){
+                    seq2Bin[individual] = k;
+                    individual = "";                           
+                }
+                else{  individual += names[j];  }
+            }
+            //get last name
+            seq2Bin[individual] = k;
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Cluster", "setMapWanted");
                exit(1);
-       }       
+       }
+}
+/***********************************************************************/
+void Cluster::updateMap() {
+    try {
+               //update location of seqs in smallRow since they move to smallCol now
+               string names = list->get(smallRow);
+               
+        string individual = "";
+        int binNameslength = names.size();
+        for(int j=0;j<binNameslength;j++){
+            if(names[j] == ','){
+                seq2Bin[individual] = smallCol;
+                individual = "";                               
+            }
+            else{  individual += names[j];  }
+        }
+        //get last name
+        seq2Bin[individual] = smallCol;                
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Cluster", "updateMap");
+               exit(1);
+       }
 }
-
 /***********************************************************************/
 
+
+