]> git.donarmstrong.com Git - mothur.git/commitdiff
added sparseDistanceMatrix class. Modified cluster commands to use the new sparse...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 27 Jul 2012 13:42:37 +0000 (09:42 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 27 Jul 2012 13:42:37 +0000 (09:42 -0400)
34 files changed:
Mothur.xcodeproj/project.pbxproj
averagelinkage.cpp
calculator.h
cluster.cpp
cluster.hpp
clusterclassic.h
clustercommand.cpp
clustercommand.h
clustersplitcommand.cpp
clustersplitcommand.h
completelinkage.cpp
getoturepcommand.cpp
getoturepcommand.h
getsharedotucommand.cpp
mgclustercommand.cpp
mothur.h
mothurout.cpp
mothurout.h
phylodiversitycommand.cpp
readblast.cpp
readblast.h
readcolumn.cpp
readmatrix.hpp
readphylip.cpp
seqerrorcommand.cpp
sequence.cpp
shhhercommand.cpp
singlelinkage.cpp
sparsedistancematrix.cpp [new file with mode: 0644]
sparsedistancematrix.h [new file with mode: 0644]
treegroupscommand.cpp
treegroupscommand.h
trimseqscommand.cpp
weightedlinkage.cpp

index 13f12e072b616b5337cd5bece8f3c68c44ca6a3d..ff0e35bbb0bac1d955c7629e75eeb872e2cc1d65 100644 (file)
@@ -51,6 +51,7 @@
                A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
                A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
                A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
+               A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
                A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
                A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
                A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
                A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
                A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
                A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
+               A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparsedistancematrix.cpp; sourceTree = "<group>"; };
+               A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sparsedistancematrix.h; sourceTree = "<group>"; };
                A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
                                A7E9B81412D37EC400DA6239 /* sharedsabundvector.h */,
                                A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */,
                                A7E9B83A12D37EC400DA6239 /* sparsematrix.hpp */,
+                               A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */,
+                               A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */,
                                A7E9B85112D37EC400DA6239 /* suffixdb.cpp */,
                                A7E9B85212D37EC400DA6239 /* suffixdb.hpp */,
                                A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */,
                                A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
                                A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */,
                                A74D59A4159A1E2000043046 /* counttable.cpp in Sources */,
+                               A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index c430c883f66d6006cc9bf01a2a964dd62957688f..e9ff3b312f04041e436686e682380dcbed019018 100644 (file)
@@ -5,14 +5,14 @@
 #include "mothur.h"
 #include "cluster.hpp"
 #include "rabundvector.hpp"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
 
 /* This class implements the average UPGMA, average neighbor clustering algorithm */
 
 /***********************************************************************/
 
-AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
-       Cluster(rav, lv, dm, c, s)
+AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
+Cluster(rav, lv, dm, c, s)
 {
        saveRow = -1;
        saveCol = -1;
@@ -28,7 +28,7 @@ string AverageLinkage::getTag() {
 
 /***********************************************************************/
 //This function updates the distance based on the average linkage method.
-bool AverageLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool AverageLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
        try {
                if ((saveRow != smallRow) || (saveCol != smallCol)) {
                        rowBin = rabund->get(smallRow);
@@ -37,9 +37,9 @@ bool AverageLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
                        saveRow = smallRow;
                        saveCol = smallCol;
                }
-               
-               colCell->dist = (colBin * colCell->dist + rowBin * rowCell->dist) / totalBin;
-               
+               //cout << "colcell.dist = " << colCell.dist << '\t' << smallRow << '\t' << smallCol << '\t' << rowCell.dist << endl;
+               colCell.dist = (colBin * colCell.dist + rowBin * rowCell.dist) / totalBin;
+        
                return(true);
        }
        catch(exception& e) {
index 6791d830f01c69492251b6110ebe5ba3d04a50a0..218f8eab74270dbb680aea0d1cf8c4ab823930ff 100644 (file)
@@ -67,86 +67,8 @@ class VecCalc
                //double findMax(vector<double>); //This returns the maximum value in the vector.
                int numNZ(vector<int>); //This returns the number of non-zero values in the vector.
                double numNZ(vector<double>); //This returns the number of non-zero values in the vector.
-               //double numPos(vector<double>); //This returns the number of positive values in the vector.
-               //double findMaxDiff(vector<double>, vector<double>); //This returns the absolute value of the maximum difference between the two vectors.
-               //double findDStat(vector<double>, vector<double>, double); //This returns the D-Statistic of the two vectors with the given total number of species.
-               //vector<int> findQuartiles(vector<double>); //This returns a vector with the first element being the index of the lower quartile of the vector and the second element being the index of the upper quartile of the vector.
-               //vector<double> add(vector<double>, double); //This adds the given number to every element in the given vector and returns the new vector.
-               //vector<double> multiply(vector<double>, double); //This multiplies every element in the given vector by the given number and returns the new vector.
-               //vector<double> power(vector<double>, double); //This raises every element in the given vector to the given number and returns the new vector.
-               //vector<double> addVecs(vector<double>,vector<double>); //The given vectors must be the same size. This adds the ith element of the first given vector to the ith element of the second given vector and returns the new vector.
-               //vector<double> multVecs(vector<double>,vector<double>); //The given vectors must be the same size. This multiplies the ith element of the first given vector to the ith element of the second given vector and returns the new vector.
-               //vector<double> remDup(vector<double>); //This returns a vector that contains 1 of each unique element in the given vector. The order of the elements is not changed.
-               //vector<double> genCVec(vector<double>); //This returns a cumilative vector of the given vector. The ith element of the returned vector is the sum of all the elements in the given vector up to i.
-               //vector<double> genRelVec(vector<double>); //This finds the sum of all the elements in the given vector and then divides the ith element in the given vector by that sum and then puts the result into a new vector, which is returned after all of the elements in the given vector have been used.
-               ///vector<double> genDiffVec(vector<double>, vector<double>);//This subtracts the ith element of the second given vector from the ith element of the first given vector and returns the new vector.
-               //vector<double> genCSVec(vector<double>);//This calculates the number of species that have the same number of individuals as the ith element of the given vector and then returns a cumulative vector.
-               //vector<double> genTotVec(vector<vector<double> >); //This adds up the ith element of all the columns and puts that value into a new vector. It those this for all the rows and then returns the new vector.
-               //vector<double> quicksort(vector<double>); //This sorts the given vector from highest to lowest and returns the sorted vector.
-               //vector<vector<double> > gen2DVec(vector<double>, int, int); //(vector, #rows/columns, 0 if the second parameter was rows, 1 if the second parameter was columns) Transforms a single vector that was formatted like a table into a 2D vector.
-               //vector<string> getSData(char[]);//This takes a file name as a parameter and reads all of the data in the file into a <string> vector.
 };
 
-/**************************************************************************************************/
-
-/*This Class is similar to the GeometricSeries.h class. It calculates
-the broken stick distribution of the table and prints the D-Statistic 
-and the confidence limits for the Kolmogorov-Smirnov 1-Sample test
-with a 95% confidence level.
-
-class BrokenStick
-{
-       public:
-               void doBStick(vector<double>);
-};
-
-//**************************************************************************************************/
-/*This Class calculates the geometric series distribution for the data.
-It prints the D-Statistic and the critical values for the Kolmogorov-Smirnov
-1-sample test at the 95% confidence interval.*/
-
-/*class GeometricSeries
-{
-       public:
-               void doGeomTest(vector<double>);
-};*/
-
-/**************************************************************************************************
-//This Class calculates the jackknifed estimate of the data and
-//prints it and the confidence limits at a chosen confidence level.
-
-class Jackknifing
-{
-       public:
-               void doJK(vector<double>, double);
-};
-/**************************************************************************************************
-/*This Class stores calculates the Kolmogorov-Smirnov 2-Sample test between two samples.
-It prints the D-Statistic and the critical value for the test at 
-the 90% and 95% confidence interval.
-
-class KS2SampleTest
-{
-       public:
-               void doKSTest(vector<double>, vector<double>);
-};
-
-/**************************************************************************************************
-//This Class calculates and prints the Q-Statistic for the data.
-class QStatistic
-{
-       public:
-               void doQStat(vector<double>);
-};
-/**************************************************************************************************
-class SSBPDiversityIndices
-{
-       public:
-               void doSSBP(vector<double>);
-               double getShan(vector<double> vec);//The Shannon Index
-               double getSimp(vector<double> vec);//The Simpson Index
-               double getBP(vector<double> vec);//The Berger-Parker Index
-};
 /**************************************************************************************************/
 //This Class stores the table of the confidence limits of the Student-T distribution.
 class TDTable
@@ -154,26 +76,6 @@ class TDTable
        public:
                double getConfLimit(int,int);
 };
-
-/**************************************************************************************************
-//This Class stores the table of the confidence limits of the One-Sample Kolmogorov-Smirnov Test.
-class KOSTable
-{
-       public:
-               double getConfLimit(int);
-};
-
 /**************************************************************************************************/
-/*This Class calculates the truncated lognormal for the data.
-It then prints the D-Statistic and the critical values for the
-Kolmogorov-Smirnov 1-Sample test.*
-
-class TrunLN
-{
-       public:
-               void doTrunLN(vector<double>, vector<double>);
-};
-/**************************************************************************************************/
-
 #endif
 
index ac9f4482da12110796c54f15c4137681a3d29038..2a3e751c5897e34039e6b9d15f1e0a908ed60556 100644 (file)
 #include "cluster.hpp"
 #include "rabundvector.hpp"
 #include "listvector.hpp"
-#include "sparsematrix.hpp"
 
 /***********************************************************************/
 
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string f) :
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
 rabund(rav), list(lv), dMatrix(dm), method(f)
 {
        try {
-/*
-       cout << "sizeof(MatData): " << sizeof(MatData) << endl;
-       cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
-
-       int nCells = dMatrix->getNNodes();
-       time_t start = time(NULL);
-
-       MatVec matvec = MatVec(nCells); 
-       int i = 0;
-       for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
-               matvec[i++] = currentCell;
-       }
-       for (i= matvec.size();i>0;i--) {
-               dMatrix->rmCell(matvec[i-1]);
-       }
-       MatData it = dMatrix->begin(); 
-       while (it != dMatrix->end()) { 
-               it = dMatrix->rmCell(it);
-       }
-       cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
-    exit(0);
-       MatData it = dMatrix->begin();
-       cout << it->row << "/" << it->column << "/" << it->dist << endl;
-       dMatrix->rmCell(dMatrix->begin());
-       cout << it->row << "/" << it->column << "/" << it->dist << endl;
-       exit(0);
-*/
-
-       // Create a data structure to quickly access the PCell information
-       // for a certain sequence. It consists of a vector of lists, where 
-       // a list contains pointers (iterators) to the all distances related
-       // to a certain sequence. The Vector is accessed via the index of a 
-       // sequence in the distance matrix.
-//ofstream outtemp;
-//string temp = "temp";
-//m->openOutputFile(temp, outtemp);    
-//cout << lv->size() << endl;
-       seqVec = vector<MatVec>(lv->size());
-       for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
-//outtemp << currentCell->row << '\t' << currentCell->column  << '\t' << currentCell->dist << endl;
-               seqVec[currentCell->row].push_back(currentCell);
-               seqVec[currentCell->column].push_back(currentCell);
-       }
-//outtemp.close();
-       mapWanted = false;  //set to true by mgcluster to speed up overlap merge
-       
-       //save so you can modify as it changes in average neighbor
-       cutoff = c;
-       m = MothurOut::getInstance();
-       
+        
+        mapWanted = false;  //set to true by mgcluster to speed up overlap merge
+        
+        //save so you can modify as it changes in average neighbor
+        cutoff = c;
+        m = MothurOut::getInstance();
        }
        catch(exception& e) {
                m->errorOut(e, "Cluster", "Cluster");
                exit(1);
        }
 }
-
-/***********************************************************************/
-
-void Cluster::getRowColCells() {
-       try {
-               PCell* smallCell = dMatrix->getSmallestCell();  //find the smallest cell - this routine should probably not be in the SpMat class
-       
-               smallRow = smallCell->row;              // get its row
-               smallCol = smallCell->column;   // get its column
-               smallDist = smallCell->dist;    // get the smallest distance
-       //cout << "small row = " << smallRow << "small col = " << smallCol << "small dist = " << smallDist << endl;
-        
-               rowCells = seqVec[smallRow];    // all distances related to the row index
-               colCells = seqVec[smallCol];    // all distances related to the column index
-               nRowCells = rowCells.size();
-               nColCells = colCells.size();
-//cout << "num rows = " << nRowCells << "num col = " << nColCells << endl;
-               
-               //for (int i = 0; i < nColCells; i++) { cout << colCells[i]->row << '\t' << colCells[i]->column << endl;  }
-               //for (int i = 0; i < nRowCells; i++) { cout << rowCells[i]->row << '\t' << rowCells[i]->column << endl;  }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Cluster", "getRowColCells");
-               exit(1);
-       }
-
-}
-/***********************************************************************/
-// Remove the specified cell from the seqVec and from the sparse
-// matrix
-void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix){
-       try {
-       
-               ull drow = cell->row;
-                       ull dcol = cell->column;
-                       if (((vrow >=0) && (drow != smallRow)) ||
-                               ((vcol >=0) && (dcol != smallCol))) {
-                               ull dtemp = drow;
-                               drow = dcol;
-                               dcol = dtemp;
-                       }
-
-                       ull crow;
-                       ull ccol;
-                       int nCells;
-                       if (vrow < 0) {
-                               nCells = seqVec[drow].size();
-                               for (vrow=0; vrow<nCells;vrow++) {
-                                       crow = seqVec[drow][vrow]->row;
-                                       ccol = seqVec[drow][vrow]->column;
-                                       if (((crow == drow) && (ccol == dcol)) ||
-                                               ((ccol == drow) && (crow == dcol))) {
-                                               break;
-                                       }
-                               }
-                       }
-
-                       seqVec[drow].erase(seqVec[drow].begin()+vrow);
-                       if (vcol < 0) {
-                               nCells = seqVec[dcol].size();
-                               for (vcol=0; vcol<nCells;vcol++) {
-                                       crow = seqVec[dcol][vcol]->row;
-                                       ccol = seqVec[dcol][vcol]->column;
-                                       if (((crow == drow) && (ccol == dcol)) ||
-                                               ((ccol == drow) && (crow == dcol))) {
-                                               break;
-                                       }
-                               }
-                       }
-               
-                       seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
-               
-                       if (rmMatrix) {
-                       //cout << " removing = " << cell->row << '\t' << cell->column  << '\t' << cell->dist << endl;
-                               dMatrix->rmCell(cell);
-               //      cout << "done" << endl;
-                       }
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Cluster", "removeCell");
-               exit(1);
-       }
-}
 /***********************************************************************/
-
 void Cluster::clusterBins(){
        try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
-               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
+               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
                rabund->set(smallRow, 0);       
                rabund->setLabel(toString(smallDist));
-
-       //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
        }
        catch(exception& e) {
                m->errorOut(e, "Cluster", "clusterBins");
                exit(1);
        }
-
-
 }
-
 /***********************************************************************/
 
 void Cluster::clusterNames(){
        try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
                if (mapWanted) {  updateMap();  }
                
                list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
                list->set(smallRow, "");        
                list->setLabel(toString(smallDist));
-       
-       //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
     }
        catch(exception& e) {
                m->errorOut(e, "Cluster", "clusterNames");
                exit(1);
        }
-
 }
-
 /***********************************************************************/
-//This function clusters based on the method of the derived class
-//At the moment only average and complete linkage are covered, because
-//single linkage uses a different approach.
 void Cluster::update(double& cutOFF){
        try {
-               getRowColCells();       
-
+        smallCol = dMatrix->getSmallestCell(smallRow);
+        nColCells = dMatrix->seqVec[smallCol].size();
+        nRowCells = dMatrix->seqVec[smallRow].size();
+        
                vector<int> foundCol(nColCells, 0);
-
+        //cout << "small cell: " << smallRow << '\t' << smallCol << endl;  
                int search;
                bool changed;
-
-               // The vector has to be traversed in reverse order to preserve the index
-               // for faster removal in removeCell()
+        
                for (int i=nRowCells-1;i>=0;i--) {
+            if (m->control_pressed) { break; }
+             
                        //if you are not the smallCell
-                       if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
-                               if (rowCells[i]->row == smallRow) {
-                                       search = rowCells[i]->column;
-                               } else {
-                                       search = rowCells[i]->row;
-                               }
-                               
+                       if (dMatrix->seqVec[smallRow][i].index != smallCol) { 
+                search = dMatrix->seqVec[smallRow][i].index;
+                
                                bool merged = false;
                                for (int j=0;j<nColCells;j++) {
-                                       if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
-                                               if (colCells[j]->row == search || colCells[j]->column == search) {
+                    
+                                       if (dMatrix->seqVec[smallCol][j].index != smallRow) {  //if you are not the smallest distance
+                                               if (dMatrix->seqVec[smallCol][j].index == search) {
                                                        foundCol[j] = 1;
                                                        merged = true;
-                                                       changed = updateDistance(colCells[j], rowCells[i]);
-                                                       // If the cell's distance changed and it had the same distance as 
-                                                       // the smallest distance, invalidate the mins vector in SparseMatrix
-                                                       if (changed) {
-                                                               if (colCells[j]->vectorMap != NULL) {
-                                                                       *(colCells[j]->vectorMap) = NULL;
-                                                                       colCells[j]->vectorMap = NULL;
-                                                               }
-                                                       }
+                                                       changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+                            dMatrix->updateCellCompliment(smallCol, j);
                                                        break;
-                                               }
-                                       }               
+                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell 
+                                       }       
                                }
                                //if not merged it you need it for warning 
                                if ((!merged) && (method == "average" || method == "weighted")) {  
-                                       //m->mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine(); 
-                                       if (cutOFF > rowCells[i]->dist) {  
-                                               cutOFF = rowCells[i]->dist;  
-                                               //m->mothurOut("changing cutoff to " + toString(cutOFF));  m->mothurOutEndLine(); 
+                                       if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {  
+                                               cutOFF = dMatrix->seqVec[smallRow][i].dist;  
                                        }
-
+                    
                                }
-                               removeCell(rowCells[i], i , -1);  
-                               
+                               dMatrix->rmCell(smallRow, i);
                        }
                }
                clusterBins();
                clusterNames();
-
+        
                // Special handling for singlelinkage case, not sure whether this
                // could be avoided
                for (int i=nColCells-1;i>=0;i--) {
-                       if (foundCol[i] == 0) {
+                       if (foundCol[i] == 0) { 
                                if (method == "average" || method == "weighted") {
-                                       if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
-                                               //m->mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
-                                               if (cutOFF > colCells[i]->dist) {  
-                                                       cutOFF = colCells[i]->dist;  
-                                                       //m->mothurOut("changing cutoff to " + toString(cutOFF));  m->mothurOutEndLine(); 
+                                       if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
+                                               if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
+                                                       cutOFF = dMatrix->seqVec[smallCol][i].dist;  
                                                }
                                        }
                                }
-                               removeCell(colCells[i], -1, i);
+                dMatrix->rmCell(smallCol, i);
                        }
                }
+        
        }
        catch(exception& e) {
                m->errorOut(e, "Cluster", "update");
@@ -284,21 +127,23 @@ void Cluster::setMapWanted(bool f)  {
        try {
                mapWanted = f;
                
-               //initialize map
-               for (int i = 0; i < list->getNumBins(); i++) {
-                       
-                       //parse bin 
-                       string names = list->get(i);
-                       while (names.find_first_of(',') != -1) { 
-                               //get name from bin
-                               string name = names.substr(0,names.find_first_of(','));
-                               //save name and bin number
-                               seq2Bin[name] = i;
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                       }
-                       
-                       //get last name
-                       seq2Bin[names] = i;
+        //initialize map
+               for (int k = 0; k < list->getNumBins(); k++) {
+            
+            string names = list->get(k);
+            
+            //parse bin
+            string individual = "";
+            int binNameslength = names.size();
+            for(int j=0;j<binNameslength;j++){
+                if(names[j] == ','){
+                    seq2Bin[individual] = k;
+                    individual = "";                           
+                }
+                else{  individual += names[j];  }
+            }
+            //get last name
+            seq2Bin[individual] = k;
                }
                
        }
@@ -309,20 +154,22 @@ void Cluster::setMapWanted(bool f)  {
 }
 /***********************************************************************/
 void Cluster::updateMap() {
-try {
+    try {
                //update location of seqs in smallRow since they move to smallCol now
                string names = list->get(smallRow);
-               while (names.find_first_of(',') != -1) { 
-                       //get name from bin
-                       string name = names.substr(0,names.find_first_of(','));
-                       //save name and bin number
-                       seq2Bin[name] = smallCol;
-                       names = names.substr(names.find_first_of(',')+1, names.length());
-               }
-                       
-               //get last name
-               seq2Bin[names] = smallCol;
                
+        string individual = "";
+        int binNameslength = names.size();
+        for(int j=0;j<binNameslength;j++){
+            if(names[j] == ','){
+                seq2Bin[individual] = smallCol;
+                individual = "";                               
+            }
+            else{  individual += names[j];  }
+        }
+        //get last name
+        seq2Bin[individual] = smallCol;                
+       
        }
        catch(exception& e) {
                m->errorOut(e, "Cluster", "updateMap");
index d7c2737a05ed1a0198be9713aa4345f288ec3976..ff5d41d38c71cfea8dd9d434f7a4e466e0cd160c 100644 (file)
@@ -2,49 +2,42 @@
 #define CLUSTER_H
 
 
+
 #include "mothur.h"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
 #include "mothurout.h"
 
 class RAbundVector;
 class ListVector;
 
-typedef vector<MatData> MatVec;
-
 class Cluster {
        
 public:
-       Cluster(RAbundVector*, ListVector*, SparseMatrix*, float, string);
+       Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
     virtual void update(double&);                              
        virtual string getTag() = 0;
        virtual void setMapWanted(bool m);  
        virtual map<string, int> getSeqtoBin()  {  return seq2Bin;      }
-
-protected:     
-       void getRowColCells();
-    void removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix=true);
-
-       virtual bool updateDistance(MatData& colCell, MatData& rowCell) = 0;
-
+    
+protected:         
+       virtual bool updateDistance(PDistCell& colCell, PDistCell& rowCell) = 0;
+    
        virtual void clusterBins();
        virtual void clusterNames();
        virtual void updateMap();
        
        RAbundVector* rabund;
        ListVector* list;
-       SparseMatrix* dMatrix;  
+       SparseDistanceMatrix* dMatrix;  
        
-       int smallRow;
-       int smallCol;
+       ull smallRow;
+       ull smallCol;
        float smallDist;
        bool mapWanted;
        float cutoff;
        map<string, int> seq2Bin;
        string method;
        
-       vector<MatVec> seqVec;          // contains vectors of cells related to a certain sequence
-       MatVec rowCells;
-       MatVec colCells;
        ull nRowCells;
        ull nColCells;
        MothurOut* m;
@@ -54,33 +47,33 @@ protected:
 
 class CompleteLinkage : public Cluster {
 public:
-       CompleteLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
-       bool updateDistance(MatData& colCell, MatData& rowCell);
+       CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
 private:
-               
+    
 };
 
 /***********************************************************************/
 
 class SingleLinkage : public Cluster {
 public:
-       SingleLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
+       SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
     void update(double&);
-       bool updateDistance(MatData& colCell, MatData& rowCell);
+       bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
 private:
-               
+    
 };
 
 /***********************************************************************/
 
 class AverageLinkage : public Cluster {
 public:
-       AverageLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
-       bool updateDistance(MatData& colCell, MatData& rowCell);
+       AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
 private:
@@ -89,15 +82,15 @@ private:
        int rowBin;
        int colBin;
        int totalBin;
-
+    
 };
 
 /***********************************************************************/
 
 class WeightedLinkage : public Cluster {
 public:
-       WeightedLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
-       bool updateDistance(MatData& colCell, MatData& rowCell);
+       WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
 private:
@@ -107,4 +100,6 @@ private:
 
 /***********************************************************************/
 
-#endif
+
+
+#endif
\ No newline at end of file
index 932c806ec42ac52a4a90670239d0c4a6a27b1efb..a650bbf8de79147c3bd48bbcf1f63fdf22c6f574 100644 (file)
@@ -41,13 +41,13 @@ private:
        struct colDist {
                int col;
                int row;
-               double dist;
+               float dist;
                colDist(int r, int c, double d) : row(r), col(c), dist(d) {}
        };
        
        RAbundVector* rabund;
        ListVector* list;
-       vector< vector<double> > dMatrix;       
+       vector< vector<float> > dMatrix;        
        //vector<colDist> rowSmallDists;
        
        int smallRow;
index 8019def31cad9acc54c1308171bf796e949890a1..56ce8bb19e19d10b7181512b31d422d3d6b2a32f 100644 (file)
@@ -288,7 +288,7 @@ int ClusterCommand::execute(){
                
                read->read(nameMap);
                list = read->getListVector();
-               matrix = read->getMatrix();
+               matrix = read->getDMatrix();
                rabund = new RAbundVector(list->getRAbundVector());
                delete read;
                
@@ -333,7 +333,7 @@ int ClusterCommand::execute(){
                loops = 0;
                double saveCutoff = cutoff;
                
-               while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+               while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){  
                
                        if (m->control_pressed) { //clean up
                                delete list; delete matrix; delete rabund; delete cluster;
@@ -353,8 +353,8 @@ int ClusterCommand::execute(){
                        loops++;
 
                        cluster->update(cutoff);
-       
-                       float dist = matrix->getSmallDist();
+            
+            float dist = matrix->getSmallDist();
                        float rndDist;
                        if (hard) {
                                rndDist = m->ceilDist(dist, precision); 
index 7349961e252104c93cf26942d79b44f11ba7c0f5..e3dd214446d31d187148de2be7dbf2ffbfebe96e 100644 (file)
@@ -14,7 +14,7 @@
 #include "sabundvector.hpp"
 #include "listvector.hpp"
 #include "cluster.hpp"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
 
 /* The cluster() command:
        The cluster command outputs a .list , .rabund and .sabund files.  
@@ -44,7 +44,7 @@ public:
        
 private:
        Cluster* cluster;
-       SparseMatrix* matrix;
+       SparseDistanceMatrix* matrix;
        ListVector* list;
        RAbundVector* rabund;
        RAbundVector oldRAbund;
index fc0211ee5a5432ef610c6f22696bff74adf0c72b..b097f382024d5be9c56bd346723e398b10ef3725 100644 (file)
@@ -1197,7 +1197,7 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile
         string listFileName = "";
         
         Cluster* cluster = NULL;
-        SparseMatrix* matrix = NULL;
+        SparseDistanceMatrix* matrix = NULL;
         ListVector* list = NULL;
         ListVector oldList;
         RAbundVector* rabund = NULL;
@@ -1227,7 +1227,7 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile
         
         list = read->getListVector();
         oldList = *list;
-        matrix = read->getMatrix();
+        matrix = read->getDMatrix();
         
         delete read;  read = NULL;
         delete nameMap; nameMap = NULL;
index 28de9488b68516e2e194db0dd35920c7078e91f9..d46bb8ed9be115e229cd1ed0ea39a993aa6f61f0 100644 (file)
@@ -15,7 +15,7 @@
 #include "sabundvector.hpp"
 #include "listvector.hpp"
 #include "cluster.hpp"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
 #include "readcluster.h"
 #include "splitmatrix.h"
 #include "readphylip.h"
index 86e90547f494ef0883705f640521cb82cf6dbb8f..06ed2db6495a0897c74c2b278aac642a15ea1c78 100644 (file)
@@ -3,7 +3,7 @@
 
 /***********************************************************************/
 
-CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
+CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
        Cluster(rav, lv, dm, c, s)
 {}
 
@@ -16,11 +16,11 @@ string CompleteLinkage::getTag() {
 
 /***********************************************************************/
 //This function updates the distance based on the furthest neighbor method.
-bool CompleteLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool CompleteLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
        try {
                bool changed = false;
-               if (colCell->dist < rowCell->dist) {
-                       colCell->dist = rowCell->dist;
+               if (colCell.dist < rowCell.dist) {
+                       colCell.dist = rowCell.dist;
                        changed = true;
                }       
                return(changed);
index 4ebde29f912f6e9a02d88dbee301488b52d4a082..4967f245fb11c2ff37195a83baf39d29667fc582 100644 (file)
@@ -359,16 +359,19 @@ int GetOTURepCommand::execute(){
 
                        list = readMatrix->getListVector();
 
-                       SparseMatrix* matrix = readMatrix->getMatrix();
+                       SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
                        
                        // Create a data structure to quickly access the distance information.
                        // It consists of a vector of distance maps, where each map contains
                        // all distances of a certain sequence. Vector and maps are accessed
                        // via the index of a sequence in the distance matrix
                        seqVec = vector<SeqMap>(list->size()); 
-                       for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
-                               if (m->control_pressed) { delete readMatrix; return 0; }
-                               seqVec[currentCell->row][currentCell->column] = currentCell->dist;
+            for (int i = 0; i < matrix->seqVec.size(); i++) {
+                for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+                    if (m->control_pressed) { delete readMatrix; return 0; }
+                    //already added everyone else in row
+                    if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
+                }
                        }
                        //add dummy map for unweighted calc
                        SeqMap dummy;
index 5af0340b4b920a4fab3decc79bd2b3b4237f85ef..d19a396410f2e2c9850f9b20a93d65c03000bac2 100644 (file)
@@ -19,7 +19,6 @@
 #include "readmatrix.hpp"
 #include "formatmatrix.h"
 
-typedef list<PCell>::iterator MatData;
 typedef map<int, float> SeqMap;
 
 struct repStruct {
index 9beca6aa96f75a58891b5326b5cbd94209701656..1b69a25d4af7e638dbf48a64baec2390f37dcfec 100644 (file)
@@ -417,10 +417,11 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                        
                        vector<string> namesOfSeqsInThisBin;
                        
-                       string names = shared->get(i);  
-                       while ((names.find_first_of(',') != -1)) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
+                       string names = shared->get(i); 
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+                       for(int j = 0; j < binNames.size(); j++) {
+                               string name = binNames[j];
                                
                                //find group
                                string seqGroup = groupMap->getGroup(name);
@@ -436,20 +437,6 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                                else {  atLeastOne[seqGroup]++;  }
                        }
                        
-                       //get last name
-                       string seqGroup = groupMap->getGroup(names);
-                       if (output != "accnos") {
-                               namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i+1)));
-                       }else {  namesOfSeqsInThisBin.push_back(names); }
-                       
-                       if (seqGroup == "not found") { m->mothurOut(names + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1);  }
-                       
-                       //is this seq in one of hte groups we care about
-                       it = groupFinder.find(seqGroup);
-                       if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have a sequence from a group you don't want
-                       else {  atLeastOne[seqGroup]++;  }
-                       
-                       
                        //make sure you have at least one seq from each group you want
                        bool sharedByAll = true;
                        map<string, int>::iterator it2;
index a845e0bfe080782b2b12d2f1a634925781578d70..dd98b3764884f5c50009d8e9e7422a71681d94c1 100644 (file)
@@ -287,7 +287,7 @@ int MGClusterCommand::execute(){
                
                if (!hclusterWanted) {
                        //get distmatrix and overlap
-                       SparseMatrix* distMatrix = read->getDistMatrix();
+                       SparseDistanceMatrix* distMatrix = read->getDistMatrix();
                        overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
                        delete read;
                
index 2c143e8667786c5740f0aa6324ea4062eab66b76..25b803fa393e27d0c2811e62efdeb042ce390dec 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -88,6 +88,7 @@ using namespace std;
 
 
 typedef unsigned long ull;
+typedef unsigned short intDist;
 
 struct IntNode {
        int lvalue;
@@ -119,7 +120,13 @@ struct diffPair {
                reverseProb = rp;
        }
 };
-
+/***********************************************************************/
+struct PDistCell{
+       ull index;
+       float dist;
+       PDistCell() :  index(0), dist(0) {};
+       PDistCell(ull c, float d) :  index(c), dist(d) {}
+};
 /************************************************************/
 struct clusterNode {
        int numSeq;
@@ -158,10 +165,14 @@ struct spearmanRank {
        
        spearmanRank(string n, float s) : name(n), score(s) {}
 };
+//***********************************************************************
+inline bool compareIndexes(PDistCell left, PDistCell right){
+       return (left.index > right.index);      
+}
 //********************************************************************************************************************
 //sorts highest to lowest
 inline bool compareSpearman(spearmanRank left, spearmanRank right){
-       return (left.score > right.score);      
+       return (left.score < right.score);      
 } 
 //********************************************************************************************************************
 //sorts highest to lowest
index 14840c16e162bc599c242e289e3c5899fb396de4..dfcf25b4447d28362dae3f27a4e9cfd27e362d8e 100644 (file)
@@ -2017,6 +2017,28 @@ bool MothurOut::mothurConvert(string item, int& num){
                exit(1);
        }
 }
+/***********************************************************************/
+bool MothurOut::mothurConvert(string item, intDist& num){
+       try {
+               bool error = false;
+               
+               if (isNumeric1(item)) {
+                       convert(item, num);
+               }else {
+                       num = 0;
+                       error = true;
+                       mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
+                       commandInputsConvertError = true;
+               }
+               
+               return error;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "mothurConvert");
+               exit(1);
+       }
+}
+
 /***********************************************************************/
 bool MothurOut::isNumeric1(string stringToCheck){
        try {
index ac47d79f7d92b113f58dbe421fdbaad892b2b175..77c5a804070eaa2dd3229375df999a5b9521dc92 100644 (file)
@@ -111,6 +111,7 @@ class MothurOut {
                int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
                int mothurRemove(string);
                bool mothurConvert(string, int&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
+        bool mothurConvert(string, intDist&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
                bool mothurConvert(string, float&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
                bool mothurConvert(string, double&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
        
index 9d68cb5debbbc65472577c4861391bb8ffbde3bc..ddd2b316d507b8477d84d529ae9fc4d88c69e518 100644 (file)
@@ -346,7 +346,7 @@ int PhyloDiversityCommand::execute(){
        
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
 
-        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
 
         
                m->mothurOutEndLine();
@@ -460,7 +460,7 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, ma
         
                for (int l = 0; l < numIters; l++) {
                                random_shuffle(randomLeaf.begin(), randomLeaf.end());
-            cout << l << endl;
+         
                                //initialize counts
                                map<string, int> counts;
                 vector< map<string, bool> > countedBranch;
index 2d4947745be0a52fafba4c33f38fb6504ede2288..5f2441515378031891c78d2fbb809c816e885cec 100644 (file)
@@ -54,7 +54,8 @@ int ReadBlast::read(NameAssignment* nameMap) {
                
                //create objects needed for read
                if (!hclusterWanted) {
-                       matrix = new SparseMatrix();
+                       matrix = new SparseDistanceMatrix();
+            matrix->resize(nseqs);
                }else{
                        overlapFile = m->getRootName(blastfile) + "overlap.dist";
                        distFile = m->getRootName(blastfile) + "hclusterDists.dist";
@@ -185,8 +186,13 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                                        //is this distance below cutoff
                                                        if (distance < cutoff) {
                                                                if (!hclusterWanted) {
-                                                                       PCell value(itA->second, it->first, distance);
-                                                                       matrix->addCell(value);
+                                    if (itA->second < it->first) {
+                                        PDistCell value(it->first, distance);
+                                        matrix->addCell(itA->second, value);
+                                    }else {
+                                        PDistCell value(itA->second, distance);
+                                        matrix->addCell(it->first, value);
+                                    }
                                                                }else{
                                                                        outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
                                                                }
@@ -252,8 +258,13 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                //is this distance below cutoff
                                if (distance < cutoff) {
                                        if (!hclusterWanted) {
-                                               PCell value(itA->second, it->first, distance);
-                                               matrix->addCell(value);
+                                               if (itA->second < it->first) {
+                            PDistCell value(it->first, distance);
+                            matrix->addCell(itA->second, value);
+                        }else {
+                            PDistCell value(itA->second, distance);
+                            matrix->addCell(it->first, value);
+                        }
                                        }else{
                                                outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
                                        }
index fd32380c20c1a7145a8c65d07700b54d2d5bc63b..ef5ff9afa182f5fb7f7226c506481a0252964962 100644 (file)
@@ -10,7 +10,7 @@
  */
 
 #include "mothur.h"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
 #include "nameassignment.hpp"
 
 /****************************************************************************************/
@@ -26,7 +26,7 @@ public:
        ~ReadBlast() {}
        
        int read(NameAssignment*);
-       SparseMatrix* getDistMatrix()           {       return matrix;          }
+       SparseDistanceMatrix* getDistMatrix()           {       return matrix;          }
        vector<seqDist> getOverlapMatrix()      {       return overlap;         }
        string getOverlapFile()                         {       return overlapFile;     }
        string getDistFile()                            {       return distFile;        }
@@ -38,7 +38,7 @@ private:
        bool minWanted;  //if true choose min bsr, if false choose max bsr
        bool hclusterWanted;
        
-       SparseMatrix* matrix;
+       SparseDistanceMatrix* matrix;
        vector<seqDist> overlap;
        MothurOut* m;
        
index 53a8c4263dddc4687acc5a64cf7c5b1d27aac647..665dcabd5db7fd17513d912bab974ae4087d449f 100644 (file)
@@ -34,7 +34,7 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                string firstName, secondName;
                float distance;
                int nseqs = nameMap->size();
-
+        DMatrix->resize(nseqs);
                list = new ListVector(nameMap->getListVector());
        
                Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
@@ -63,33 +63,34 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                        
                        if(distance < cutoff && itA != itB){
                                if(itA->second > itB->second){
-                                       PCell value(itA->second, itB->second, distance);
-                       
+                    PDistCell value(itA->second, distance);
+                    
+                    
                                        if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
                                                refRow = itA->second;
                                                refCol = itB->second;
-                                               D->addCell(value);
+                                               DMatrix->addCell(itB->second, value);
                                        }
                                        else if(refRow == itA->second && refCol == itB->second){
                                                lt = 0;
                                        }
                                        else{
-                                               D->addCell(value);
+                                               DMatrix->addCell(itB->second, value);
                                        }
                                }
                                else if(itA->second < itB->second){
-                                       PCell value(itB->second, itA->second, distance);
+                                       PDistCell value(itB->second, distance);
                        
                                        if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
                                                refRow = itA->second;
                                                refCol = itB->second;
-                                               D->addCell(value);
+                                               DMatrix->addCell(itA->second, value);
                                        }
                                        else if(refRow == itB->second && refCol == itA->second){
                                                lt = 0;
                                        }
                                        else{
-                                               D->addCell(value);
+                                               DMatrix->addCell(itA->second, value);
                                        }
                                }
                                reading->update(itA->second * nseqs);
@@ -100,7 +101,7 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                if(lt == 0){  // oops, it was square
        
                        fileHandle.close();  //let's start over
-                       D->clear();  //let's start over
+                       DMatrix->clear();  //let's start over
                   
                        m->openInputFile(distFile, fileHandle);  //let's start over
 
@@ -119,8 +120,8 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                                else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                
                                if(distance < cutoff && itA->second > itB->second){
-                                       PCell value(itA->second, itB->second, distance);
-                                       D->addCell(value);
+                    PDistCell value(itA->second, distance);
+                                       DMatrix->addCell(itB->second, value);
                                        reading->update(itA->second * nseqs);
                                }
                
@@ -145,10 +146,6 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
 }
 
 /***********************************************************************/
-
-ReadColumnMatrix::~ReadColumnMatrix(){
-       //delete D;
-       //delete list;
-}
-
+ReadColumnMatrix::~ReadColumnMatrix(){}
+/***********************************************************************/
 
index 49ae2946fda42c49c8475b040ac8f7e3f5617499..5ef8afa9d7846a18957ac30cb483e0be98f6450f 100644 (file)
 
 #include "mothur.h"
 #include "listvector.hpp"
-#include "sparsematrix.hpp"
 #include "nameassignment.hpp"
+#include "sparsedistancematrix.h"
 
 class SparseMatrix;
 
 class ReadMatrix {
 
 public:
-       ReadMatrix(){   D = new SparseMatrix();  m = MothurOut::getInstance();  }
+       ReadMatrix(){ DMatrix = new SparseDistanceMatrix(); m = MothurOut::getInstance();  }
        virtual ~ReadMatrix() {}
        virtual int read(NameAssignment*){ return 1; }
        
        void setCutoff(float c)                 {       cutoff = c;             }
-       SparseMatrix* getMatrix()               {       return D;               }
+    SparseDistanceMatrix* getDMatrix()         {       return DMatrix;         }
        ListVector* getListVector()             {       return list;    }
-//     OrderVector* getOrderVector()   {       return order;   }
 
        int successOpen;
        
 protected:
-       SparseMatrix* D;
+    SparseDistanceMatrix* DMatrix;
        ListVector* list;
        float cutoff;
        MothurOut* m;
index 1c529b26f15fdfa172c8a0f7ebcf20187e87b03b..d256f92a816b03c101a01c12fdf19f4cace6678a 100644 (file)
@@ -33,7 +33,7 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
         try {
         
                         float distance;
-                        int square, nseqs;
+                        int square, nseqs; 
                         string name;
                         vector<string> matrixNames;
                                                
@@ -72,7 +72,8 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                         }
         
                         Progress* reading;
-      
+                        DMatrix->resize(nseqs);
+        
                         if(square == 0){
 
                                 reading = new Progress("Reading matrix:     ", nseqs * (nseqs - 1) / 2);
@@ -95,14 +96,13 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
                                                                                                                
                                                         fileHandle >> distance;
-                                                                                       
                                                 
                                                         if (distance == -1) { distance = 1000000; }
                                                                                                                else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                                 
                                                         if(distance < cutoff){
-                                                                PCell value(i, j, distance);
-                                                                D->addCell(value);
+                                                            PDistCell value(i, distance);
+                                                            DMatrix->addCell(j, value);
                                                         }
                                                         index++;
                                                         reading->update(index);
@@ -121,8 +121,8 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                                         
                                                         if(distance < cutoff){
-                                                                PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
-                                                                D->addCell(value);
+                                                            PDistCell value(nameMap->get(matrixNames[i]), distance);
+                                                            DMatrix->addCell(nameMap->get(matrixNames[j]), value);
                                                         }
                                                         index++;
                                                         reading->update(index);
@@ -153,8 +153,8 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                                         
                                                         if(distance < cutoff && j < i){
-                                                                PCell value(i, j, distance);
-                                                                D->addCell(value);
+                                                            PDistCell value(i, distance);
+                                                            DMatrix->addCell(j, value);
                                                         }
                                                         index++;
                                                         reading->update(index);
@@ -173,8 +173,8 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.                                                        
                                                         
                                                                                                                if(distance < cutoff && j < i){
-                                                                PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
-                                                                D->addCell(value);
+                                                            PDistCell value(nameMap->get(matrixNames[i]), distance);
+                                                            DMatrix->addCell(nameMap->get(matrixNames[j]), value);
                                                         }
                                                         index++;
                                                         reading->update(index);
@@ -187,20 +187,11 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                
                         reading->finish();
                         delete reading;
-
+            
                         list->setLabel("0");
                         fileHandle.close();
 
-                     /*   if(nameMap != NULL){
-                                for(int i=0;i<matrixNames.size();i++){
-                                        nameMap->erase(matrixNames[i]);
-                                }
-                                if(nameMap->size() > 0){
-                                        //should probably tell them what is missing if we missed something
-                                        m->mothurOut("missed something\t" + toString(nameMap->size())); m->mothurOutEndLine();
-                                }
-                        } */
-                                               
+                                                               
                                                return 1;
 
                 }
@@ -211,8 +202,6 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
        }
 
 /***********************************************************************/
+ReadPhylipMatrix::~ReadPhylipMatrix(){}
+/***********************************************************************/
 
-ReadPhylipMatrix::~ReadPhylipMatrix(){
-       // delete D;
-       // delete list;
-}
index 9a7b814873f53239e734aee7733af51cf49dab6d..5ec6cf2d1cc4778399d289ea2280c882f936b944 100644 (file)
@@ -821,12 +821,16 @@ void SeqErrorCommand::getReferences(){
                                //
                                //                      int endPos = rdb->referenceSeqs[i].getEndPos();
                                //                      if(endPos < minEndPos)          {       minEndPos = endPos;             }                               
+                               if (rdb->referenceSeqs[i].getNumBases() == 0) {
+                    m->mothurOut("[WARNING]: " + rdb->referenceSeqs[i].getName() + " is blank, ignoring.");m->mothurOutEndLine(); 
+                }else {
+                    referenceSeqs.push_back(rdb->referenceSeqs[i]);
+                }
                                
-                               referenceSeqs.push_back(rdb->referenceSeqs[i]);
                        }
                        referenceFileName = rdb->getSavedReference();
                        
-                       m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  
                
                }else {
                        int start = time(NULL);
@@ -844,9 +848,12 @@ void SeqErrorCommand::getReferences(){
        //
        //                      int endPos = currentSeq.getEndPos();
        //                      if(endPos < minEndPos)          {       minEndPos = endPos;             }
-                               referenceSeqs.push_back(currentSeq);
-                               
-                               if (rdb->save) { rdb->referenceSeqs.push_back(currentSeq); }
+                if (currentSeq.getNumBases() == 0) {
+                    m->mothurOut("[WARNING]: " + currentSeq.getName() + " is blank, ignoring.");m->mothurOutEndLine(); 
+                }else {
+                    referenceSeqs.push_back(currentSeq);
+                    if (rdb->save) { rdb->referenceSeqs.push_back(currentSeq); }
+                }
                                        
                                m->gobble(referenceFile);
                        }
@@ -860,7 +867,7 @@ void SeqErrorCommand::getReferences(){
                for(int i=0;i<numRefs;i++){
                        referenceSeqs[i].padToPos(maxStartPos);
                        referenceSeqs[i].padFromPos(minEndPos);
-               }
+        }
                
                if(numAmbigSeqs != 0){
                        m->mothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n");
@@ -964,7 +971,6 @@ int SeqErrorCommand::getErrors(Sequence query, Sequence reference, Compare& erro
                errors.errorRate = (double)(errors.total-errors.matches) / (double)errors.total;
                errors.queryName = query.getName();
                errors.refName = reference.getName();
-               
                //return errors;
         return 0;
        }
index a1e787bab8cd24ebb352593055e7d2fc10b406de..a359607a3ea11fa118efed129407ee608d4903e5 100644 (file)
@@ -682,7 +682,7 @@ int Sequence::getEndPos(){
 //********************************************************************************************************************
 
 void Sequence::padFromPos(int end){
-       cout << end << '\t' << endPos << endl;
+       //cout << end << '\t' << endPos << endl;
        for(int j = end; j < endPos; j++) {
                aligned[j] = '.';
        }
index 115e930969323ba4655e0b5ba5c2a8e1166a1397..c34f25de509c78b9e1b97b7c303142864e3c7e01 100644 (file)
@@ -802,7 +802,7 @@ string ShhherCommand::createNamesFile(){
                        duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
                }
                
-               string nameFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName) + getOutputFileNameTag("name");
+               string nameFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
                
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
@@ -2309,7 +2309,7 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
                 if (m->control_pressed) { break; }
                 
                 vector<int> otuCounts(numOTUs, 0);
-                for(int i=0;i<numSeqs;i++)     {       otuCounts[otuData[i]]++;        }
+                for(int j=0;j<numSeqs;j++)     {       otuCounts[otuData[j]]++;        }
                 
                 calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);        
                 
@@ -2632,7 +2632,7 @@ int ShhherCommand::cluster(string filename, string distFileName, string namesFil
                read->read(clusterNameMap);
         
                ListVector* list = read->getListVector();
-               SparseMatrix* matrix = read->getMatrix();
+               SparseDistanceMatrix* matrix = read->getDMatrix();
                
                delete read; 
                delete clusterNameMap; 
index 1a1b2aee36bed8af6b7d9782e54adcdedc04d466..3af1ea0c346030eb6f8eda3527ffc7a882c1b950 100644 (file)
@@ -5,7 +5,7 @@
 
 /***********************************************************************/
 
-SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
+SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
 Cluster(rav, lv, dm, c, s)
 {}
 
@@ -18,9 +18,11 @@ string SingleLinkage::getTag() {
 
 /***********************************************************************/
 //This function clusters based on the single linkage method.
-void  SingleLinkage::update(double& cutOFF){
+void SingleLinkage::update(double& cutOFF){
        try {
-               getRowColCells();       
+               smallCol = dMatrix->getSmallestCell(smallRow);
+        nColCells = dMatrix->seqVec[smallCol].size();
+        nRowCells = dMatrix->seqVec[smallRow].size();  
        
                vector<bool> deleted(nRowCells, false);
                int rowInd;
@@ -30,55 +32,45 @@ void  SingleLinkage::update(double& cutOFF){
                // The vector has to be traversed in reverse order to preserve the index
                // for faster removal in removeCell()
                for (int i=nRowCells-1;i>=0;i--) {
-                       if ((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol)) {
-                               rowInd = i;   // The index of the smallest distance cell in rowCells
-                       } else {
-                               if (rowCells[i]->row == smallRow) {
-                                       search = rowCells[i]->column;
-                               } else {
-                                       search = rowCells[i]->row;
-                               }
-               
-                               for (int j=0;j<nColCells;j++) {
-                                       if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) {
-                                               if (colCells[j]->row == search || colCells[j]->column == search) {
-                                                       changed = updateDistance(colCells[j], rowCells[i]);
-                                                       // If the cell's distance changed and it had the same distance as 
-                                                       // the smallest distance, invalidate the mins vector in SparseMatrix
-                                                       if (changed) {
-                                                               if (colCells[j]->vectorMap != NULL) {
-                                                                       *(colCells[j]->vectorMap) = NULL;
-                                                                       colCells[j]->vectorMap = NULL;
-                                                               }
-                                                       }
-                                                       removeCell(rowCells[i], i , -1);
-                                                       deleted[i] = true;
-                                                       break;
-                                               }
-                                       }
-                               }
-                               if (!deleted[i]) {
-                                       // Assign the cell to the new cluster 
-                                       // remove the old cell from seqVec and add the cell
-                                       // with the new row and column assignment again
-                                       removeCell(rowCells[i], i , -1, false);
-                                       if (search < smallCol){
-                                               rowCells[i]->row = smallCol;
-                                               rowCells[i]->column = search;
-                                       } else {
-                                               rowCells[i]->row = search;
-                                               rowCells[i]->column = smallCol;
-                                       }
-                                       seqVec[rowCells[i]->row].push_back(rowCells[i]);
-                                       seqVec[rowCells[i]->column].push_back(rowCells[i]);
-                               }
-                       }       
+                if (dMatrix->seqVec[smallRow][i].index == smallCol) {
+                    rowInd = i;   // The index of the smallest distance cell in rowCells
+                } else {
+                    search = dMatrix->seqVec[smallRow][i].index;
+                    
+                    for (int j=0;j<nColCells;j++) {
+                        if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the small cell
+                            if (dMatrix->seqVec[smallCol][j].index == search) {
+                                changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+                                dMatrix->updateCellCompliment(smallCol, j);
+                                dMatrix->rmCell(smallRow, i);
+                                deleted[i] = true;
+                                break;
+                            }
+                        }
+                    }
+                    if (!deleted[i]) {
+                        // Assign the cell to the new cluster 
+                        // remove the old cell from seqVec and add the cell
+                        // with the new row and column assignment again
+                        float distance =  dMatrix->seqVec[smallRow][i].dist;
+                        dMatrix->rmCell(smallRow, i);
+                        if (search < smallCol){
+                            PDistCell value(smallCol, distance);
+                            dMatrix->addCell(search, value);
+                        } else {
+                            PDistCell value(search, distance);
+                            dMatrix->addCell(smallCol, value);
+                        }
+                        sort(dMatrix->seqVec[smallCol].begin(), dMatrix->seqVec[smallCol].end(), compareIndexes);
+                        sort(dMatrix->seqVec[search].begin(), dMatrix->seqVec[search].end(), compareIndexes); 
+                    }
+                }
                }
                clusterBins();
                clusterNames();
                // remove also the cell with the smallest distance
 
-               removeCell(rowCells[rowInd], -1 , -1);
+               dMatrix->rmCell(smallRow, rowInd);
        }
        catch(exception& e) {
                m->errorOut(e, "SingleLinkage", "update");
@@ -89,11 +81,11 @@ void  SingleLinkage::update(double& cutOFF){
 
 /***********************************************************************/
 //This function updates the distance based on the nearest neighbor method.
-bool SingleLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool SingleLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
        try {
                bool changed = false;
-               if (colCell->dist > rowCell->dist) {
-                       colCell->dist = rowCell->dist;
+               if (colCell.dist > rowCell.dist) {
+                       colCell.dist = rowCell.dist;
                }
                return(changed);
        }
diff --git a/sparsedistancematrix.cpp b/sparsedistancematrix.cpp
new file mode 100644 (file)
index 0000000..8035e13
--- /dev/null
@@ -0,0 +1,149 @@
+//
+//  sparsedistancematrix.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 7/16/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "sparsedistancematrix.h"
+
+
+/***********************************************************************/
+
+SparseDistanceMatrix::SparseDistanceMatrix() : numNodes(0), smallDist(1e6){  m = MothurOut::getInstance(); sorted=false; aboveCutoff = 1e6; }
+
+/***********************************************************************/
+
+int SparseDistanceMatrix::getNNodes(){
+       return numNodes; 
+}
+/***********************************************************************/
+
+void SparseDistanceMatrix::clear(){
+    for (int i = 0; i < seqVec.size(); i++) {  seqVec[i].clear();  }
+    seqVec.clear();
+}
+
+/***********************************************************************/
+
+float SparseDistanceMatrix::getSmallDist(){
+       return smallDist;
+}
+/***********************************************************************/
+
+int SparseDistanceMatrix::updateCellCompliment(ull row, ull col){
+    try {
+        
+        ull vrow = seqVec[row][col].index;
+        ull vcol = 0;
+        
+        //find the columns entry for this cell as well
+        for (int i = 0; i < seqVec[vrow].size(); i++) {  if (seqVec[vrow][i].index == row) { vcol = i;  break; }  }
+
+        seqVec[vrow][vcol].dist = seqVec[row][col].dist;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "updateCellCompliment");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+int SparseDistanceMatrix::rmCell(ull row, ull col){
+       try {
+        numNodes-=2;
+        ull vrow = seqVec[row][col].index;
+        ull vcol = 0;
+        
+        //find the columns entry for this cell as well
+        for (int i = 0; i < seqVec[vrow].size(); i++) {  if (seqVec[vrow][i].index == row) { vcol = i;  break; }  }
+        
+        seqVec[vrow].erase(seqVec[vrow].begin()+vcol);
+        seqVec[row].erase(seqVec[row].begin()+col);
+               return(0);
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "rmCell");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void SparseDistanceMatrix::addCell(ull row, PDistCell cell){
+       try {
+               numNodes+=2;
+               if(cell.dist < smallDist){ smallDist = cell.dist; }
+        
+        seqVec[row].push_back(cell);
+        PDistCell temp(row, cell.dist);
+        seqVec[cell.index].push_back(temp);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "addCell");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+ull SparseDistanceMatrix::getSmallestCell(ull& row){
+       try {
+        if (!sorted) { sortSeqVec(); sorted = true; }
+        
+        vector<PDistCellMin> mins;
+        smallDist = 1e6;
+       
+        for (int i = 0; i < seqVec.size(); i++) {
+            for (int j = 0; j < seqVec[i].size(); j++) {
+                //already checked everyone else in row
+                if (i < seqVec[i][j].index) {  
+                           
+                    float dist = seqVec[i][j].dist;
+                  
+                    if(dist < smallDist){  //found a new smallest distance
+                        mins.clear();
+                        smallDist = dist;
+                        PDistCellMin temp(i, seqVec[i][j].index);
+                        mins.push_back(temp);  
+                    }
+                    else if(dist == smallDist){  //if a subsequent distance is the same as mins distance add the new iterator to the mins vector
+                        PDistCellMin temp(i, seqVec[i][j].index);
+                        mins.push_back(temp); 
+                    }
+                }else { j+=seqVec[i].size(); } //stop looking 
+                       }
+               }
+        
+               random_shuffle(mins.begin(), mins.end());  //randomize the order of the iterators in the mins vector
+        
+        row = mins[0].row;
+        ull col = mins[0].col;
+
+               return col;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparseMatrix", "getSmallestCell");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+int SparseDistanceMatrix::sortSeqVec(){
+       try {
+        
+        //saves time in getSmallestCell, by making it so you dont search the repeats
+        for (int i = 0; i < seqVec.size(); i++) {  sort(seqVec[i].begin(), seqVec[i].end(), compareIndexes); }
+    
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparseMatrix", "getSmallestCell");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
diff --git a/sparsedistancematrix.h b/sparsedistancematrix.h
new file mode 100644 (file)
index 0000000..f18fdd7
--- /dev/null
@@ -0,0 +1,66 @@
+#ifndef Mothur_sparsedistancematrix_h
+#define Mothur_sparsedistancematrix_h
+
+//
+//  sparsedistancematrix.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 7/16/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "mothur.h"
+#include "mothurout.h"
+
+
+class ListVector;
+
+
+/* For each distance in a sparse matrix we have a row, column and distance.  
+ The PDistCell consists of the column and distance.
+ We know the row by the distances row in the seqVec matrix.  
+ SeqVec is square and each row is sorted so the column values are ascending to save time in the search for the smallest distance. */
+
+/***********************************************************************/
+struct PDistCellMin{
+       ull row;
+    ull col;
+       //PDistCell* cell;
+       PDistCellMin(ull r, ull c) :  col(c), row(r) {}
+};
+/***********************************************************************/
+
+
+
+class SparseDistanceMatrix {
+       
+public:
+       SparseDistanceMatrix();
+       ~SparseDistanceMatrix(){ clear(); }
+       int getNNodes();
+       ull getSmallestCell(ull& index);                //Return the cell with the smallest distance
+       float getSmallDist();
+       
+       int rmCell(ull, ull);
+    int updateCellCompliment(ull, ull);
+    void resize(ull n) { seqVec.resize(n);  }
+    void clear();
+       void addCell(ull, PDistCell);
+    vector<vector<PDistCell> > seqVec;
+    
+private:
+       PDistCell smallCell;                            //The cell with the smallest distance
+       int numNodes;
+    
+    bool sorted;
+    int sortSeqVec();
+       float smallDist, aboveCutoff;
+    
+       MothurOut* m;
+};
+
+/***********************************************************************/
+
+
+
+#endif
index 888f50202e3443855a99d82d71b126cf715ee380..6633e5160559297c23a542c373ed1b1cc9a3ccdc 100644 (file)
@@ -286,7 +286,7 @@ TreeGroupCommand::TreeGroupCommand(string option)  {
 TreeGroupCommand::~TreeGroupCommand(){
        if (abort == false) {
                if (format == "sharedfile") {  delete input; }
-               else { delete readMatrix;  delete matrix; delete list; }
+               else { delete list; }
                delete tmap;  
        }
        
@@ -418,7 +418,8 @@ int TreeGroupCommand::execute(){
                }else{
                        //read in dist file
                        filename = inputfile;
-               
+            
+            ReadMatrix* readMatrix;
                        if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
                        else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
                                
@@ -434,7 +435,7 @@ int TreeGroupCommand::execute(){
        
                        readMatrix->read(nameMap);
                        list = readMatrix->getListVector();
-                       matrix = readMatrix->getMatrix();
+                       SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
 
                        //make treemap
                        tmap = new TreeMap();
@@ -457,7 +458,9 @@ int TreeGroupCommand::execute(){
                        
                        if (m->control_pressed) { return 0; }
                        
-                       vector< vector<double> > matrix = makeSimsDist();
+                       vector< vector<double> > matrix = makeSimsDist(dMatrix);
+            delete readMatrix;
+            delete dMatrix;
                        
                        if (m->control_pressed) { return 0; }
 
@@ -554,7 +557,7 @@ void TreeGroupCommand::printSims(ostream& out, vector< vector<double> >& simMatr
        }
 }
 /***********************************************************/
-vector< vector<double> > TreeGroupCommand::makeSimsDist() {
+vector< vector<double> > TreeGroupCommand::makeSimsDist(SparseDistanceMatrix* matrix) {
        try {
                numGroups = list->size();
                
@@ -569,13 +572,17 @@ vector< vector<double> > TreeGroupCommand::makeSimsDist() {
                
                //go through sparse matrix and fill sims
                //go through each cell in the sparsematrix
-               for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
-                       //similairity = -(distance-1)
-                       simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
-                       simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);   
-                       
-                       if (m->control_pressed) { return simMatrix; }
+        for (int i = 0; i < matrix->seqVec.size(); i++) {
+            for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+                
+                //already checked everyone else in row
+                if (i < matrix->seqVec[i][j].index) {   
+                    simMatrix[i][matrix->seqVec[i][j].index] = -(matrix->seqVec[i][j].dist -1.0);      
+                    simMatrix[matrix->seqVec[i][j].index][i] = -(matrix->seqVec[i][j].dist -1.0);      
                        
+                    if (m->control_pressed) { return simMatrix; }
+                }
+            }
                }
 
                return simMatrix;
index a0e0d420163cabdfe46518873cc3a42d98bc14a6..b0ae730d98aa8af6da4fecabe3cf0f7aa03df2d4 100644 (file)
@@ -101,12 +101,10 @@ private:
        Tree* createTree(vector< vector<double> >&);
        void printSims(ostream&, vector< vector<double> >&);
        int makeSimsShared();
-       vector< vector<double> > makeSimsDist();
+       vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
     int writeTree(string, Tree*);
     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
        
-       ReadMatrix* readMatrix;
-       SparseMatrix* matrix;
        NameAssignment* nameMap;
        ListVector* list;
        TreeMap* tmap;
index 3b9d195f34fa3e916ae9b3d92328195c1337579e..00367695944bd579c13052210b9f0d8491d0e509 100644 (file)
@@ -607,11 +607,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
                        //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
-            if (m->debug) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); }
+            
                        QualityScores currQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
-                 if (m->debug) { m->mothurOut("[DEBUG]: qual = " + currQual.getName() + '\n'); }
+                if ((m->debug)&&(count>15800)) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); m->mothurOut("[DEBUG]: " + toString(getpid()) + '\n'); }
                        }
                        
                        string origSeq = currSeq.getUnaligned();
@@ -883,6 +883,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 tempNameFileNames,
                                                                 lines[process],
                                                                 qLines[process]);
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
                                
                                //pass groupCounts to parent
                                if(createGroup){
index 0851bd7ef94392ac2b2e269c16082981a7a0445a..19c41ce555b87ed296e134bf6d403752a07b5159 100644 (file)
@@ -11,7 +11,7 @@
 
 /***********************************************************************/
 
-WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
+WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
        Cluster(rav, lv, dm, c, s)
 {
        saveRow = -1;
@@ -28,7 +28,7 @@ string WeightedLinkage::getTag() {
 
 /***********************************************************************/
 //This function updates the distance based on the average linkage method.
-bool WeightedLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool WeightedLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
        try {
                if ((saveRow != smallRow) || (saveCol != smallCol)) {
 //                     rowBin = rabund->get(smallRow);
@@ -38,7 +38,7 @@ bool WeightedLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
                        saveCol = smallCol;
                }
                
-               colCell->dist = (colCell->dist + rowCell->dist) / 2.0;
+               colCell.dist = (colCell.dist + rowCell.dist) / 2.0;
                
                return(true);
        }