]> git.donarmstrong.com Git - mothur.git/commitdiff
added adjust parameter to mgcluster. fixed bug in classify.otu with countable. fixed...
authorSarah Westcott <mothur.westcott@gmail.com>
Thu, 18 Jul 2013 20:16:16 +0000 (13:16 -0700)
committerSarah Westcott <mothur.westcott@gmail.com>
Thu, 18 Jul 2013 20:16:16 +0000 (13:16 -0700)
20 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
averagelinkage.cpp
classifyotucommand.cpp
cluster.cpp
cluster.hpp
clustercommand.cpp
clustercommand.h
clustersplitcommand.cpp
completelinkage.cpp
mgclustercommand.cpp
mgclustercommand.h
otuhierarchycommand.cpp
screenseqscommand.h
seqsummarycommand.cpp
shhhercommand.cpp
singlelinkage.cpp
sparsedistancematrix.cpp
sparsedistancematrix.h
weightedlinkage.cpp

index b09bab447e3307bc279189d5e5b5dae30a677e7a..0b945f5e899372cc6e5cbe16ce8ef4abaa96d738 100644 (file)
                        outputFiles = (
                                "$(TARGET_BUILD_DIR)/$(INPUT_FILE_BASE).o",
                        );
-                       script = "/usr/local/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
+                       script = "/usr/local/gfortran/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
                };
 /* End PBXBuildRule section */
 
index f9c0436c62874864279e533a3258d72bf2f6cf2d..f757a7920bb337883a2a7773f35ed533d2186c80 100644 (file)
@@ -558,6 +558,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        if (m->control_pressed) {  break; }
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
+            cout  << candidateSeq->getAligned() << endl;
                        report.setCandidate(candidateSeq);
 
                        int origNumBases = candidateSeq->getNumBases();
index e9ff3b312f04041e436686e682380dcbed019018..8627253777c1b09a1b4b16d1ff2ad4f90468248c 100644 (file)
@@ -11,8 +11,8 @@
 
 /***********************************************************************/
 
-AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-Cluster(rav, lv, dm, c, s)
+AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+Cluster(rav, lv, dm, c, s, a)
 {
        saveRow = -1;
        saveCol = -1;
index 170c234ac245e6c32e3174ad80145fbcc359f2f2..76d7504aa04b0170629b0f022a882be94ceacfc0 100644 (file)
@@ -616,9 +616,10 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        //add this bins taxonomy to summary
                        if (basis == "sequence") {
                                for(int j = 0; j < names.size(); j++) {  
-                    int numReps = 1;
-                    if (countfile != "") {  numReps = ct->getNumSeqs(names[j]); }
-                    for(int k = 0; k < numReps; k++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
+                    //int numReps = 1;
+                    //if (countfile != "") {  numReps = ct->getNumSeqs(names[j]); }
+                    //for(int k = 0; k < numReps; k++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
+                    taxaSum->addSeqToTree(names[j], noConfidenceConTax);
                 }
                        }else { //otu
                 map<string, bool> containsGroup; 
index 0a70fbfee99957de6f08315562f9f0d9ac14fee0..6b69e4d5312fcce5db6c084fd42499b64724fb5e 100644 (file)
@@ -13,8 +13,8 @@
 
 /***********************************************************************/
 
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
-rabund(rav), list(lv), dMatrix(dm), method(f)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) :
+rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs)
 {
        try {
         
@@ -85,7 +85,21 @@ void Cluster::update(double& cutOFF){
                                                        changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
                             dMatrix->updateCellCompliment(smallCol, j);
                                                        break;
-                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell 
+                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell
+                            if (adjust != -1.0) { //adjust
+                                merged = true;
+                                PDistCell value(search, adjust); //create a distance for the missing value
+                                int location = dMatrix->addCellSorted(smallCol, value);
+                                changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]);
+                                dMatrix->updateCellCompliment(smallCol, location);
+                                nColCells++;
+                                foundCol.push_back(0); //add a new found column
+                                //adjust value
+                                for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
+                                foundCol[location] = 1;
+                            }
+                            j+=nColCells;
+                        } 
                                        }       
                                }
                                //if not merged it you need it for warning 
@@ -105,14 +119,20 @@ void Cluster::update(double& cutOFF){
                // Special handling for singlelinkage case, not sure whether this
                // could be avoided
                for (int i=nColCells-1;i>=0;i--) {
-                       if (foundCol[i] == 0) { 
-                               if (method == "average" || method == "weighted") {
-                                       if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
-                                               if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
-                                                       cutOFF = dMatrix->seqVec[smallCol][i].dist;  
-                                               }
-                                       }
-                               }
+                       if (foundCol[i] == 0) {
+                if (adjust != -1.0) { //adjust
+                    PDistCell value(smallCol, adjust); //create a distance for the missing value
+                    changed = updateDistance(dMatrix->seqVec[smallCol][i], value);
+                    dMatrix->updateCellCompliment(smallCol, i);
+                }else {
+                    if (method == "average" || method == "weighted") {
+                        if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
+                            if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
+                                cutOFF = dMatrix->seqVec[smallCol][i].dist;  
+                            }
+                        }
+                    }
+                }
                 dMatrix->rmCell(smallCol, i);
                        }
                }
index 26a01b9389778fea2e2b1067a1738c75027ad7f2..23a3d97f26db6087adcf1dee3c015e581e06321c 100644 (file)
@@ -13,7 +13,7 @@ class ListVector;
 class Cluster {
        
 public:
-       Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
     virtual ~Cluster() {}
     virtual void update(double&);                              
        virtual string getTag() = 0;
@@ -33,7 +33,7 @@ protected:
        
        ull smallRow;
        ull smallCol;
-       float smallDist;
+       float smallDist, adjust;
        bool mapWanted;
        float cutoff;
        map<string, int> seq2Bin;
@@ -48,7 +48,7 @@ protected:
 
 class CompleteLinkage : public Cluster {
 public:
-       CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
@@ -60,7 +60,7 @@ private:
 
 class SingleLinkage : public Cluster {
 public:
-       SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
     void update(double&);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
@@ -73,7 +73,7 @@ private:
 
 class AverageLinkage : public Cluster {
 public:
-       AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
@@ -90,7 +90,7 @@ private:
 
 class WeightedLinkage : public Cluster {
 public:
-       WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
index 2ac2eaa6bd2466df296e407e7e67f9c8a095edc3..94129654e1d2b11275e82d3a5da0382b1448efa9 100644 (file)
@@ -29,6 +29,7 @@ vector<string> ClusterCommand::setParameters(){
                CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim);
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        //CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -45,7 +46,8 @@ string ClusterCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
-               helpString += "The cluster command should be in the following format: \n";
+               //helpString += "The adjust parameter is used to handle missing distances.  If you set a cutoff, adjust=f by default.  If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method.  Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
+        helpString += "The cluster command should be in the following format: \n";
                helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
                helpString += "The acceptable cluster methods are furthest, nearest, average and weighted.  If no method is provided then average is assumed.\n";       
                return helpString;
@@ -229,10 +231,18 @@ ClusterCommand::ClusterCommand(string option)  {
                        temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
                        sim = m->isTrue(temp); 
                        
+            //bool cutoffSet = false;
                        temp = validParameter.validFile(parameters, "cutoff", false);
                        if (temp == "not found") { temp = "10"; }
+            //else { cutoffSet = true; }
                        m->mothurConvert(temp, cutoff); 
-                       cutoff += (5 / (precision * 10.0));  
+                       cutoff += (5 / (precision * 10.0));
+            
+            //temp = validParameter.validFile(parameters, "adjust", false);                            if (temp == "not found") { temp = "F"; }
+            //if (m->isNumeric1(temp))    { m->mothurConvert(temp, adjust);   }
+            //else if (m->isTrue(temp))   { adjust = 1.0;                     }
+            //else                        { adjust = -1.0;                    }
+            adjust=-1.0;
                        
                        method = validParameter.validFile(parameters, "method", false);
                        if (method == "not found") { method = "average"; }
@@ -325,10 +335,10 @@ int ClusterCommand::execute(){
                }
                
                //create cluster
-               if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-               else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-               else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-               else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method);    }
+               if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); }
+               else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); }
+               else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust);     }
+               else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method, adjust);    }
                tag = cluster->getTag();
                
                if (outputDir == "") { outputDir += m->hasPath(distfile); }
index 96b7c08861f216862de0c1e600d07fed463eb56a..5786da220046713d4eb18a26f2aba4529011093c 100644 (file)
@@ -56,6 +56,7 @@ private:
 
        string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile, countfile;
        double cutoff;
+    float adjust;
        string showabund, timing;
        int precision, length;
        ofstream sabundFile, rabundFile, listFile;
index 270ea6208d8ead0683d8cf46fb8d7a417469c243..b02bd20063e9f70bb422fa541403013d6a1976ab 100644 (file)
@@ -1330,9 +1330,10 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile
         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
                
         //create cluster
-        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+        float adjust = -1.0;
+        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); }
+        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); }
+        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust);     }
         tag = cluster->getTag();
                
         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
index 06ed2db6495a0897c74c2b278aac642a15ea1c78..0a3c7b3e42f68347d0315299d5ed050b89ebb796 100644 (file)
@@ -3,8 +3,8 @@
 
 /***********************************************************************/
 
-CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-       Cluster(rav, lv, dm, c, s)
+CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+       Cluster(rav, lv, dm, c, s, a)
 {}
 
 /***********************************************************************/
index e3287f7f9bb19a4079c28e92fe57cf99e06216d5..97f0afdb366949d39e5af37a96c0211d9df0ca17 100644 (file)
@@ -23,6 +23,7 @@ vector<string> MGClusterCommand::setParameters(){
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
                CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
                CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
+        CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
                CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -40,7 +41,7 @@ vector<string> MGClusterCommand::setParameters(){
 string MGClusterCommand::getHelpString(){      
        try {
                string helpString = "";
-               helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
+               helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty, adjust and hcluster. The blast parameter is required.\n";
                helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
                helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
                helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
@@ -48,6 +49,7 @@ string MGClusterCommand::getHelpString(){
                helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
                helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
                helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
+        helpString += "The adjust parameter is used to handle missing distances.  If you set a cutoff, adjust=f by default.  If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method.  Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
                helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
                helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
                helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
@@ -184,7 +186,10 @@ MGClusterCommand::MGClusterCommand(string option) {
                        precisionLength = temp.length();
                        m->mothurConvert(temp, precision); 
                        
-                       temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
+            cutoffSet = false;
+                       temp = validParameter.validFile(parameters, "cutoff", false);
+            if (temp == "not found") { temp = "0.70"; }
+            else { cutoffSet = true;  }
                        m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
                        
@@ -210,7 +215,12 @@ MGClusterCommand::MGClusterCommand(string option) {
                        hclusterWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
-                       hard = m->isTrue(temp);            
+                       hard = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "adjust", false);                              if (temp == "not found") { if (cutoffSet) { temp = "F"; }else { temp="T"; } }
+            if (m->isNumeric1(temp))    { m->mothurConvert(temp, adjust);   }
+            else if (m->isTrue(temp))   { adjust = 1.0;                     }
+            else                        { adjust = -1.0;                    }
                }
 
        }
@@ -302,9 +312,9 @@ int MGClusterCommand::execute(){
                        delete read;
                
                        //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
+                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
                        cluster->setMapWanted(true);
                        Seq2Bin = cluster->getSeqtoBin();
                        oldSeq2Bin = Seq2Bin;
index 008bd222a0a91869e14895738ea852b383c0395f..3865bb20811a0ac29e29e0fec02392268ea0a657 100644 (file)
@@ -56,9 +56,9 @@ private:
        string blastfile, method, namefile, countfile, overlapFile, distFile, outputDir;
        ofstream sabundFile, rabundFile, listFile;
        double cutoff;
-       float penalty;
+       float penalty, adjust;
        int precision, length, precisionLength;
-       bool abort, minWanted, hclusterWanted, merge, hard;
+       bool abort, minWanted, hclusterWanted, merge, hard, cutoffSet;
        
        void printData(ListVector*);
        ListVector* mergeOPFs(map<string, int>, float);
index dc026decc9db9916513fcbd1d41fe23298173717..a294a77c69a8cd5582a38cf7394c510a1e5ccccb 100644 (file)
@@ -180,18 +180,10 @@ int OtuHierarchyCommand::execute(){
                
                        if (m->control_pressed) {  return 0; }
                        
-                       string names = lists[0].get(i); 
-                       
-                       //parse bin
-                       while (names.find_first_of(',') != -1) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               littleBins[name] = i;  
-                       }
-                       
-                       //get last name
-                       littleBins[names] = i;
-               }
+                       string bin = lists[0].get(i);
+            vector<string> names; m->splitAtComma(bin, names);
+                       for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; }
+        }
                
                ofstream out;
         map<string, string> variables; 
@@ -207,24 +199,19 @@ int OtuHierarchyCommand::execute(){
                
                        if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
                        
-                       string names = lists[1].get(i);
+                       string binnames = lists[1].get(i);
+            vector<string> names; m->splitAtComma(binnames, names);
+            
                        
                        //output column 1
-                       if (output == "name")   {   out << names << '\t';       }
-                       else                                    {       out << i << '\t';               }
+                       if (output == "name")   {   out << binnames << '\t';    }
+                       else                                    {       out << (i+1) << '\t';           }
                        
                        map<int, int> bins; //bin numbers in little that are in this bin in big
                        map<int, int>::iterator it;
                        
                        //parse bin
-                       while (names.find_first_of(',') != -1) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               bins[littleBins[name]] = littleBins[name];  
-                       }
-                       
-                       //get last name
-                       bins[littleBins[names]] = littleBins[names]; 
+                       for (int j = 0; j < names.size(); j++) { bins[littleBins[names[j]]] = littleBins[names[j]];   }
                        
                        string col2 = "";
                        for (it = bins.begin(); it != bins.end(); it++) {
index aeaddae1300207aff7ef713858e9ee760be52e12..18a55ac1b584dc0bce91c4f0f950d164e03fb983 100644 (file)
@@ -74,8 +74,8 @@ private:
 
        bool abort;
        string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile, contigsreport, summaryfile;
-       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria, minOverlap, oStart, oEnd, mismatches, maxN, maxInsert;
-    float minSim, minScore;
+       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, minOverlap, oStart, oEnd, mismatches, maxN, maxInsert;
+    float minSim, minScore, criteria;
        vector<string> outputNames;
        vector<string> optimize;
        map<string, int> nameMap;
index 27bb8d95190fb66ea03510cf266d2d53824d988c..e6b037b6bcafafa6c7ccf5aafa9731de3d7ba864 100644 (file)
@@ -364,7 +364,7 @@ int SeqSummaryCommand::execute(){
                int size = startPosition.size();
                
                //find means
-               double meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
+               unsigned long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
                meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
                for (int i = 0; i < size; i++) {
                        meanStartPosition += startPosition[i];
@@ -374,8 +374,9 @@ int SeqSummaryCommand::execute(){
                        meanLongHomoPolymer += longHomoPolymer[i];
                }
                 
-               //this is an int divide so the remainder is lost
-               meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
+        double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer;
+                
+               meanstartPosition /= (double) size; meanendPosition /= (double) size; meanlongHomoPolymer /= (double) size; meanseqLength /= (double) size; meanambigBases /= (double) size;
                                
                int ptile0_25   = int(size * 0.025);
                int ptile25             = int(size * 0.250);
@@ -399,7 +400,7 @@ int SeqSummaryCommand::execute(){
                m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75]) + "\t" + toString(ptile75+1)); m->mothurOutEndLine();
                m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5]) + "\t" + toString(ptile97_5+1)); m->mothurOutEndLine();
                m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
-               m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
+               m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine();
 
                if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
@@ -543,7 +544,7 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
                                }
                                
                                //for each sequence this sequence represents
-                               for (int i = 0; i < num; i++) {
+                               for (int j = 0; j < num; j++) {
                                        startPosition.push_back(current.getStartPos());
                                        endPosition.push_back(current.getEndPos());
                                        seqLength.push_back(current.getNumBases());
index 9bd437ad3cb7b2a75b69770769c8ddbc942f4a35..b25b9def28ffc13df0a2d0736fb69e427c9d0c62 100644 (file)
@@ -2727,7 +2727,8 @@ int ShhherCommand::cluster(string filename, string distFileName, string namesFil
         
                RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
-               Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
+        float adjust = -1.0;
+               Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest", adjust);
                string tag = cluster->getTag();
                
                double clusterCutoff = cutoff;
index 3af1ea0c346030eb6f8eda3527ffc7a882c1b950..3bed931d89ddf0e11f433916b5d4929694a3c19c 100644 (file)
@@ -5,8 +5,8 @@
 
 /***********************************************************************/
 
-SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-Cluster(rav, lv, dm, c, s)
+SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+Cluster(rav, lv, dm, c, s, a)
 {}
 
 
index 03e9fa8e5b77905d4a4416a6c592d3b3756a02e4..417e24ae94e4242db9a56db14fd2987714ab1712 100644 (file)
@@ -89,6 +89,30 @@ void SparseDistanceMatrix::addCell(ull row, PDistCell cell){
                exit(1);
        }
 }
+/***********************************************************************/
+int SparseDistanceMatrix::addCellSorted(ull row, PDistCell cell){
+       try {
+               numNodes+=2;
+               if(cell.dist < smallDist){ smallDist = cell.dist; }
+        
+        seqVec[row].push_back(cell);
+        PDistCell temp(row, cell.dist);
+        seqVec[cell.index].push_back(temp);
+        
+        sortSeqVec(row);
+        sortSeqVec(cell.index);
+        
+        int location = -1; //find location of new cell when sorted
+        for (int i = 0; i < seqVec[row].size(); i++) {  if (seqVec[row][i].index == cell.index) { location = i; break; } }
+        
+        return location;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "addCellSorted");
+               exit(1);
+       }
+}
+
 /***********************************************************************/
 
 ull SparseDistanceMatrix::getSmallestCell(ull& row){
@@ -151,3 +175,18 @@ int SparseDistanceMatrix::sortSeqVec(){
 }
 /***********************************************************************/
 
+int SparseDistanceMatrix::sortSeqVec(int index){
+       try {
+        
+        //saves time in getSmallestCell, by making it so you dont search the repeats
+        sort(seqVec[index].begin(), seqVec[index].end(), compareIndexes);
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
index f18fdd7db9a0351ba59aa38adabf3365fe0221c2..6c6bbe5a03d2c378c146539b3c20d5f3fd8755c3 100644 (file)
@@ -46,17 +46,21 @@ public:
     void resize(ull n) { seqVec.resize(n);  }
     void clear();
        void addCell(ull, PDistCell);
+    int addCellSorted(ull, PDistCell);
     vector<vector<PDistCell> > seqVec;
     
+    
 private:
        PDistCell smallCell;                            //The cell with the smallest distance
        int numNodes;
     
     bool sorted;
     int sortSeqVec();
+    int sortSeqVec(int);
        float smallDist, aboveCutoff;
     
        MothurOut* m;
+
 };
 
 /***********************************************************************/
index c1e4d51be52b95b7eec757930532a8d231f07ea5..b79986bd159866cd5b80aec76c570f7acac6e092 100644 (file)
@@ -10,8 +10,8 @@
 
 /***********************************************************************/
 
-WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-       Cluster(rav, lv, dm, c, s)
+WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+       Cluster(rav, lv, dm, c, s, a)
 {
        saveRow = -1;
        saveCol = -1;