]> git.donarmstrong.com Git - mothur.git/blobdiff - cluster.cpp
modified mpi code to save ram by writing out every 10 seqs.
[mothur.git] / cluster.cpp
index 00d8083275779399748d690b3a9fe84910d91104..bd5986e7d7a2d0bdfe759088c8306e206a4ee9ad 100644 (file)
@@ -14,8 +14,8 @@
 
 /***********************************************************************/
 
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
-rabund(rav), list(lv), dMatrix(dm), cutoff(c)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string f) :
+rabund(rav), list(lv), dMatrix(dm), method(f)
 {
 /*
        cout << "sizeof(MatData): " << sizeof(MatData) << endl;
@@ -56,6 +56,10 @@ rabund(rav), list(lv), dMatrix(dm), cutoff(c)
                seqVec[currentCell->column].push_back(currentCell);
        }
        mapWanted = false;  //set to true by mgcluster to speed up overlap merge
+       
+       //save so you can modify as it changes in average neighbor
+       cutoff = c;
+       m = MothurOut::getInstance();
 }
 
 /***********************************************************************/
@@ -74,7 +78,7 @@ void Cluster::getRowColCells() {
                nColCells = colCells.size();
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "getRowColCells");
+               m->errorOut(e, "Cluster", "getRowColCells");
                exit(1);
        }
 
@@ -139,7 +143,7 @@ void Cluster::clusterBins(){
        //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "clusterBins");
+               m->errorOut(e, "Cluster", "clusterBins");
                exit(1);
        }
 
@@ -160,7 +164,7 @@ void Cluster::clusterNames(){
        //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
     }
        catch(exception& e) {
-               errorOut(e, "Cluster", "clusterNames");
+               m->errorOut(e, "Cluster", "clusterNames");
                exit(1);
        }
 
@@ -170,11 +174,12 @@ void Cluster::clusterNames(){
 //This function clusters based on the method of the derived class
 //At the moment only average and complete linkage are covered, because
 //single linkage uses a different approach.
-void Cluster::update(){
+void Cluster::update(double& cutOFF){
        try {
                getRowColCells();       
        
-               vector<int> found(nColCells, 0);
+               vector<int> foundCol(nColCells, 0);
+
                int search;
                bool changed;
 
@@ -187,11 +192,13 @@ void Cluster::update(){
                                } else {
                                        search = rowCells[i]->row;
                                }
-               
+                               
+                               bool merged = false;
                                for (int j=0;j<nColCells;j++) {
-                                       if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) {
+                                       if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
                                                if (colCells[j]->row == search || colCells[j]->column == search) {
-                                                       found[j] = 1;
+                                                       foundCol[j] = 1;
+                                                       merged = true;
                                                        changed = updateDistance(colCells[j], rowCells[i]);
                                                        // If the cell's distance changed and it had the same distance as 
                                                        // the smallest distance, invalidate the mins vector in SparseMatrix
@@ -203,9 +210,19 @@ void Cluster::update(){
                                                        }
                                                        break;
                                                }
+                                       }               
+                               }
+                               //if not merged it you need it for warning 
+                               if ((!merged) && (method == "average")) {  
+                                       //m->mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine(); 
+                                       if (cutOFF > rowCells[i]->dist) {  
+                                               cutOFF = rowCells[i]->dist;  
+                                               //m->mothurOut("changing cutoff to " + toString(cutOFF));  m->mothurOutEndLine(); 
                                        }
+
                                }
-                               removeCell(rowCells[i], i , -1);
+                               removeCell(rowCells[i], i , -1);  
+                               
                        }
                }
                clusterBins();
@@ -214,20 +231,29 @@ void Cluster::update(){
                // Special handling for singlelinkage case, not sure whether this
                // could be avoided
                for (int i=nColCells-1;i>=0;i--) {
-                       if (found[i] == 0) {
+                       if (foundCol[i] == 0) {
+                               if (method == "average") {
+                                       if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
+                                               //m->mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
+                                               if (cutOFF > colCells[i]->dist) {  
+                                                       cutOFF = colCells[i]->dist;  
+                                                       //m->mothurOut("changing cutoff to " + toString(cutOFF));  m->mothurOutEndLine(); 
+                                               }
+                                       }
+                               }
                                removeCell(colCells[i], -1, i);
                        }
                }
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "update");
+               m->errorOut(e, "Cluster", "update");
                exit(1);
        }
 }
 /***********************************************************************/
-void Cluster::setMapWanted(bool m)  {  
+void Cluster::setMapWanted(bool f)  {  
        try {
-               mapWanted = m;
+               mapWanted = f;
                
                //initialize map
                for (int i = 0; i < list->getNumBins(); i++) {
@@ -248,7 +274,7 @@ void Cluster::setMapWanted(bool m)  {
                
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "setMapWanted");
+               m->errorOut(e, "Cluster", "setMapWanted");
                exit(1);
        }
 }
@@ -270,7 +296,7 @@ try {
                
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "updateMap");
+               m->errorOut(e, "Cluster", "updateMap");
                exit(1);
        }
 }