]> git.donarmstrong.com Git - mothur.git/blobdiff - cluster.cpp
changes while testing
[mothur.git] / cluster.cpp
index 4009ed075e63c78e4fb661352c675cff3209e030..0a70fbfee99957de6f08315562f9f0d9ac14fee0 100644 (file)
 #include "cluster.hpp"
 #include "rabundvector.hpp"
 #include "listvector.hpp"
-#include "sparsematrix.hpp"
 
 /***********************************************************************/
 
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
-rabund(rav), list(lv), dMatrix(dm)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
+rabund(rav), list(lv), dMatrix(dm), method(f)
 {
-/*
-       cout << "sizeof(MatData): " << sizeof(MatData) << endl;
-       cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
-
-       int nCells = dMatrix->getNNodes();
-       time_t start = time(NULL);
-
-       MatVec matvec = MatVec(nCells); 
-       int i = 0;
-       for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
-               matvec[i++] = currentCell;
-       }
-       for (i= matvec.size();i>0;i--) {
-               dMatrix->rmCell(matvec[i-1]);
-       }
-       MatData it = dMatrix->begin(); 
-       while (it != dMatrix->end()) { 
-               it = dMatrix->rmCell(it);
-       }
-       cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
-    exit(0);
-       MatData it = dMatrix->begin();
-       cout << it->row << "/" << it->column << "/" << it->dist << endl;
-       dMatrix->rmCell(dMatrix->begin());
-       cout << it->row << "/" << it->column << "/" << it->dist << endl;
-       exit(0);
-*/
-
-       // Create a data structure to quickly access the PCell information
-       // for a certain sequence. It consists of a vector of lists, where 
-       // a list contains pointers (iterators) to the all distances related
-       // to a certain sequence. The Vector is accessed via the index of a 
-       // sequence in the distance matrix.
-       seqVec = vector<MatVec>(lv->size());
-       for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
-               seqVec[currentCell->row].push_back(currentCell);
-               seqVec[currentCell->column].push_back(currentCell);
-       }
-       mapWanted = false;  //set to true by mgcluster to speed up overlap merge
-       
-       //save so you can modify as it changes in average neighbor
-       cutoff = c;
-}
-
-/***********************************************************************/
-
-void Cluster::getRowColCells() {
        try {
-               PCell* smallCell = dMatrix->getSmallestCell();  //find the smallest cell - this routine should probably not be in the SpMat class
-       
-               smallRow = smallCell->row;              // get its row
-               smallCol = smallCell->column;   // get its column
-               smallDist = smallCell->dist;    // get the smallest distance
-       
-               rowCells = seqVec[smallRow];    // all distances related to the row index
-               colCells = seqVec[smallCol];    // all distances related to the column index
-               nRowCells = rowCells.size();
-               nColCells = colCells.size();
+        
+        mapWanted = false;  //set to true by mgcluster to speed up overlap merge
+        
+        //save so you can modify as it changes in average neighbor
+        cutoff = c;
+        m = MothurOut::getInstance();
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "getRowColCells");
+               m->errorOut(e, "Cluster", "Cluster");
                exit(1);
        }
-
-}
-/***********************************************************************/
-// Remove the specified cell from the seqVec and from the sparse
-// matrix
-void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix)
-{
-       ull drow = cell->row;
-       ull dcol = cell->column;
-       if (((vrow >=0) && (drow != smallRow)) ||
-               ((vcol >=0) && (dcol != smallCol))) {
-               ull dtemp = drow;
-               drow = dcol;
-               dcol = dtemp;
-       }
-
-       ull crow;
-       ull ccol;
-       int nCells;
-       if (vrow < 0) {
-               nCells = seqVec[drow].size();
-               for (vrow=0; vrow<nCells;vrow++) {
-                       crow = seqVec[drow][vrow]->row;
-                       ccol = seqVec[drow][vrow]->column;
-                       if (((crow == drow) && (ccol == dcol)) ||
-                               ((ccol == drow) && (crow == dcol))) {
-                               break;
-                       }
-               }
-       }
-       seqVec[drow].erase(seqVec[drow].begin()+vrow);
-       if (vcol < 0) {
-               nCells = seqVec[dcol].size();
-               for (vcol=0; vcol<nCells;vcol++) {
-                       crow = seqVec[dcol][vcol]->row;
-                       ccol = seqVec[dcol][vcol]->column;
-                       if (((crow == drow) && (ccol == dcol)) ||
-                               ((ccol == drow) && (crow == dcol))) {
-                               break;
-                       }
-               }
-       }
-       seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
-       if (rmMatrix) {
-               dMatrix->rmCell(cell);
-       }
 }
-
-
 /***********************************************************************/
-
 void Cluster::clusterBins(){
        try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
-               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
+               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
                rabund->set(smallRow, 0);       
                rabund->setLabel(toString(smallDist));
-
-       //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "clusterBins");
+               m->errorOut(e, "Cluster", "clusterBins");
                exit(1);
        }
-
-
 }
-
 /***********************************************************************/
 
 void Cluster::clusterNames(){
        try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
                if (mapWanted) {  updateMap();  }
                
                list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
                list->set(smallRow, "");        
                list->setLabel(toString(smallDist));
-       
-       //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
     }
        catch(exception& e) {
-               errorOut(e, "Cluster", "clusterNames");
+               m->errorOut(e, "Cluster", "clusterNames");
                exit(1);
        }
-
 }
-
 /***********************************************************************/
-//This function clusters based on the method of the derived class
-//At the moment only average and complete linkage are covered, because
-//single linkage uses a different approach.
 void Cluster::update(double& cutOFF){
        try {
-               getRowColCells();       
-       
+        smallCol = dMatrix->getSmallestCell(smallRow);
+        nColCells = dMatrix->seqVec[smallCol].size();
+        nRowCells = dMatrix->seqVec[smallRow].size();
+        
                vector<int> foundCol(nColCells, 0);
-
+        //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
                int search;
                bool changed;
-
-               // The vector has to be traversed in reverse order to preserve the index
-               // for faster removal in removeCell()
+        
                for (int i=nRowCells-1;i>=0;i--) {
-                       if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
-                               if (rowCells[i]->row == smallRow) {
-                                       search = rowCells[i]->column;
-                               } else {
-                                       search = rowCells[i]->row;
-                               }
-                               
+            if (m->control_pressed) { break; }
+             
+                       //if you are not the smallCell
+                       if (dMatrix->seqVec[smallRow][i].index != smallCol) { 
+                search = dMatrix->seqVec[smallRow][i].index;
+                
                                bool merged = false;
                                for (int j=0;j<nColCells;j++) {
-                                       if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
-                                               if (colCells[j]->row == search || colCells[j]->column == search) {
+                    
+                                       if (dMatrix->seqVec[smallCol][j].index != smallRow) {  //if you are not the smallest distance
+                                               if (dMatrix->seqVec[smallCol][j].index == search) {
                                                        foundCol[j] = 1;
                                                        merged = true;
-                                                       changed = updateDistance(colCells[j], rowCells[i]);
-                                                       // If the cell's distance changed and it had the same distance as 
-                                                       // the smallest distance, invalidate the mins vector in SparseMatrix
-                                                       if (changed) {
-                                                               if (colCells[j]->vectorMap != NULL) {
-                                                                       *(colCells[j]->vectorMap) = NULL;
-                                                                       colCells[j]->vectorMap = NULL;
-                                                               }
-                                                       }
+                                                       changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+                            dMatrix->updateCellCompliment(smallCol, j);
                                                        break;
-                                               }
-                                       }               
+                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell 
+                                       }       
                                }
                                //if not merged it you need it for warning 
-                               if (!merged) {  
-                                       mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine(); 
-                                       if (cutOFF > rowCells[i]->dist) {  cutOFF = rowCells[i]->dist;  mothurOut("changing cutoff to " + toString(cutOFF));  mothurOutEndLine(); }
-
+                               if ((!merged) && (method == "average" || method == "weighted")) {  
+                                       if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {  
+                                               cutOFF = dMatrix->seqVec[smallRow][i].dist;
+                        //cout << "changing cutoff to " << cutOFF << endl;
+                                       }
+                    
                                }
-                               removeCell(rowCells[i], i , -1);  
-                               
+                               dMatrix->rmCell(smallRow, i);
                        }
                }
                clusterBins();
                clusterNames();
-
+        
                // Special handling for singlelinkage case, not sure whether this
                // could be avoided
                for (int i=nColCells-1;i>=0;i--) {
-                       if (foundCol[i] == 0) {
-                               if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
-                                       mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine();
-                                       if (cutOFF > colCells[i]->dist) {  cutOFF = colCells[i]->dist;  mothurOut("changing cutoff to " + toString(cutOFF));  mothurOutEndLine(); }
+                       if (foundCol[i] == 0) { 
+                               if (method == "average" || method == "weighted") {
+                                       if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
+                                               if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
+                                                       cutOFF = dMatrix->seqVec[smallCol][i].dist;  
+                                               }
+                                       }
                                }
-                               removeCell(colCells[i], -1, i);
+                dMatrix->rmCell(smallCol, i);
                        }
                }
+        
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "update");
+               m->errorOut(e, "Cluster", "update");
                exit(1);
        }
 }
 /***********************************************************************/
-void Cluster::setMapWanted(bool m)  {  
+void Cluster::setMapWanted(bool f)  {  
        try {
-               mapWanted = m;
+               mapWanted = f;
                
-               //initialize map
-               for (int i = 0; i < list->getNumBins(); i++) {
-                       
-                       //parse bin 
-                       string names = list->get(i);
-                       while (names.find_first_of(',') != -1) { 
-                               //get name from bin
-                               string name = names.substr(0,names.find_first_of(','));
-                               //save name and bin number
-                               seq2Bin[name] = i;
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                       }
-                       
-                       //get last name
-                       seq2Bin[names] = i;
+        //initialize map
+               for (int k = 0; k < list->getNumBins(); k++) {
+            
+            string names = list->get(k);
+            
+            //parse bin
+            string individual = "";
+            int binNameslength = names.size();
+            for(int j=0;j<binNameslength;j++){
+                if(names[j] == ','){
+                    seq2Bin[individual] = k;
+                    individual = "";                           
+                }
+                else{  individual += names[j];  }
+            }
+            //get last name
+            seq2Bin[individual] = k;
                }
                
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "setMapWanted");
+               m->errorOut(e, "Cluster", "setMapWanted");
                exit(1);
        }
 }
 /***********************************************************************/
 void Cluster::updateMap() {
-try {
+    try {
                //update location of seqs in smallRow since they move to smallCol now
                string names = list->get(smallRow);
-               while (names.find_first_of(',') != -1) { 
-                       //get name from bin
-                       string name = names.substr(0,names.find_first_of(','));
-                       //save name and bin number
-                       seq2Bin[name] = smallCol;
-                       names = names.substr(names.find_first_of(',')+1, names.length());
-               }
-                       
-               //get last name
-               seq2Bin[names] = smallCol;
                
+        string individual = "";
+        int binNameslength = names.size();
+        for(int j=0;j<binNameslength;j++){
+            if(names[j] == ','){
+                seq2Bin[individual] = smallCol;
+                individual = "";                               
+            }
+            else{  individual += names[j];  }
+        }
+        //get last name
+        seq2Bin[individual] = smallCol;                
+       
        }
        catch(exception& e) {
-               errorOut(e, "Cluster", "updateMap");
+               m->errorOut(e, "Cluster", "updateMap");
                exit(1);
        }
 }