]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge remote-tracking branch 'origin'
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 14 Aug 2013 16:20:47 +0000 (12:20 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 14 Aug 2013 16:20:47 +0000 (12:20 -0400)
25 files changed:
.gitignore
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
averagelinkage.cpp
classifyotucommand.cpp
cluster.cpp
cluster.hpp
clustercommand.cpp
clustercommand.h
clustersplitcommand.cpp
completelinkage.cpp
flowdata.cpp
flowdata.h
mgclustercommand.cpp
mgclustercommand.h
otuhierarchycommand.cpp
qualityscores.cpp
screenseqscommand.h
seqsummarycommand.cpp
shhhercommand.cpp
singlelinkage.cpp
sparsedistancematrix.cpp
sparsedistancematrix.h
trimflowscommand.cpp
weightedlinkage.cpp

index fb4ae5123e7cdc9f64ab449b496415f8edd27733..cc23b1fe84d2389cf242169eae47a6849af2d968 100644 (file)
@@ -1,9 +1,9 @@
 *.logfile
 *.o
 *.pbxproj
-*.xcuserdata
 *.zip
 .DS_Store
 .idea
 build
 xcuserdata
+*.xcuserdata
\ No newline at end of file
index 2fce558f6e412a62f2d2425990addeab0d525259..a17c97b9a7b3de851f05a07c91fe0fa28287f257 100644 (file)
                        outputFiles = (
                                "$(TARGET_BUILD_DIR)/$(INPUT_FILE_BASE).o",
                        );
-                       script = "/usr/local/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
+                       script = "/usr/local/gfortran/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
                };
 /* End PBXBuildRule section */
 
index f9c0436c62874864279e533a3258d72bf2f6cf2d..f757a7920bb337883a2a7773f35ed533d2186c80 100644 (file)
@@ -558,6 +558,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        if (m->control_pressed) {  break; }
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
+            cout  << candidateSeq->getAligned() << endl;
                        report.setCandidate(candidateSeq);
 
                        int origNumBases = candidateSeq->getNumBases();
index e9ff3b312f04041e436686e682380dcbed019018..8627253777c1b09a1b4b16d1ff2ad4f90468248c 100644 (file)
@@ -11,8 +11,8 @@
 
 /***********************************************************************/
 
-AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-Cluster(rav, lv, dm, c, s)
+AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+Cluster(rav, lv, dm, c, s, a)
 {
        saveRow = -1;
        saveCol = -1;
index 170c234ac245e6c32e3174ad80145fbcc359f2f2..76d7504aa04b0170629b0f022a882be94ceacfc0 100644 (file)
@@ -616,9 +616,10 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        //add this bins taxonomy to summary
                        if (basis == "sequence") {
                                for(int j = 0; j < names.size(); j++) {  
-                    int numReps = 1;
-                    if (countfile != "") {  numReps = ct->getNumSeqs(names[j]); }
-                    for(int k = 0; k < numReps; k++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
+                    //int numReps = 1;
+                    //if (countfile != "") {  numReps = ct->getNumSeqs(names[j]); }
+                    //for(int k = 0; k < numReps; k++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
+                    taxaSum->addSeqToTree(names[j], noConfidenceConTax);
                 }
                        }else { //otu
                 map<string, bool> containsGroup; 
index 0a70fbfee99957de6f08315562f9f0d9ac14fee0..6b69e4d5312fcce5db6c084fd42499b64724fb5e 100644 (file)
@@ -13,8 +13,8 @@
 
 /***********************************************************************/
 
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
-rabund(rav), list(lv), dMatrix(dm), method(f)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) :
+rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs)
 {
        try {
         
@@ -85,7 +85,21 @@ void Cluster::update(double& cutOFF){
                                                        changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
                             dMatrix->updateCellCompliment(smallCol, j);
                                                        break;
-                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell 
+                                               }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell
+                            if (adjust != -1.0) { //adjust
+                                merged = true;
+                                PDistCell value(search, adjust); //create a distance for the missing value
+                                int location = dMatrix->addCellSorted(smallCol, value);
+                                changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]);
+                                dMatrix->updateCellCompliment(smallCol, location);
+                                nColCells++;
+                                foundCol.push_back(0); //add a new found column
+                                //adjust value
+                                for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
+                                foundCol[location] = 1;
+                            }
+                            j+=nColCells;
+                        } 
                                        }       
                                }
                                //if not merged it you need it for warning 
@@ -105,14 +119,20 @@ void Cluster::update(double& cutOFF){
                // Special handling for singlelinkage case, not sure whether this
                // could be avoided
                for (int i=nColCells-1;i>=0;i--) {
-                       if (foundCol[i] == 0) { 
-                               if (method == "average" || method == "weighted") {
-                                       if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
-                                               if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
-                                                       cutOFF = dMatrix->seqVec[smallCol][i].dist;  
-                                               }
-                                       }
-                               }
+                       if (foundCol[i] == 0) {
+                if (adjust != -1.0) { //adjust
+                    PDistCell value(smallCol, adjust); //create a distance for the missing value
+                    changed = updateDistance(dMatrix->seqVec[smallCol][i], value);
+                    dMatrix->updateCellCompliment(smallCol, i);
+                }else {
+                    if (method == "average" || method == "weighted") {
+                        if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
+                            if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
+                                cutOFF = dMatrix->seqVec[smallCol][i].dist;  
+                            }
+                        }
+                    }
+                }
                 dMatrix->rmCell(smallCol, i);
                        }
                }
index 26a01b9389778fea2e2b1067a1738c75027ad7f2..23a3d97f26db6087adcf1dee3c015e581e06321c 100644 (file)
@@ -13,7 +13,7 @@ class ListVector;
 class Cluster {
        
 public:
-       Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
     virtual ~Cluster() {}
     virtual void update(double&);                              
        virtual string getTag() = 0;
@@ -33,7 +33,7 @@ protected:
        
        ull smallRow;
        ull smallCol;
-       float smallDist;
+       float smallDist, adjust;
        bool mapWanted;
        float cutoff;
        map<string, int> seq2Bin;
@@ -48,7 +48,7 @@ protected:
 
 class CompleteLinkage : public Cluster {
 public:
-       CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
@@ -60,7 +60,7 @@ private:
 
 class SingleLinkage : public Cluster {
 public:
-       SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
     void update(double&);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
@@ -73,7 +73,7 @@ private:
 
 class AverageLinkage : public Cluster {
 public:
-       AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
@@ -90,7 +90,7 @@ private:
 
 class WeightedLinkage : public Cluster {
 public:
-       WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+       WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float);
        bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
        string getTag();
        
index 2ac2eaa6bd2466df296e407e7e67f9c8a095edc3..94129654e1d2b11275e82d3a5da0382b1448efa9 100644 (file)
@@ -29,6 +29,7 @@ vector<string> ClusterCommand::setParameters(){
                CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim);
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        //CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -45,7 +46,8 @@ string ClusterCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
-               helpString += "The cluster command should be in the following format: \n";
+               //helpString += "The adjust parameter is used to handle missing distances.  If you set a cutoff, adjust=f by default.  If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method.  Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
+        helpString += "The cluster command should be in the following format: \n";
                helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
                helpString += "The acceptable cluster methods are furthest, nearest, average and weighted.  If no method is provided then average is assumed.\n";       
                return helpString;
@@ -229,10 +231,18 @@ ClusterCommand::ClusterCommand(string option)  {
                        temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
                        sim = m->isTrue(temp); 
                        
+            //bool cutoffSet = false;
                        temp = validParameter.validFile(parameters, "cutoff", false);
                        if (temp == "not found") { temp = "10"; }
+            //else { cutoffSet = true; }
                        m->mothurConvert(temp, cutoff); 
-                       cutoff += (5 / (precision * 10.0));  
+                       cutoff += (5 / (precision * 10.0));
+            
+            //temp = validParameter.validFile(parameters, "adjust", false);                            if (temp == "not found") { temp = "F"; }
+            //if (m->isNumeric1(temp))    { m->mothurConvert(temp, adjust);   }
+            //else if (m->isTrue(temp))   { adjust = 1.0;                     }
+            //else                        { adjust = -1.0;                    }
+            adjust=-1.0;
                        
                        method = validParameter.validFile(parameters, "method", false);
                        if (method == "not found") { method = "average"; }
@@ -325,10 +335,10 @@ int ClusterCommand::execute(){
                }
                
                //create cluster
-               if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-               else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-               else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-               else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method);    }
+               if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); }
+               else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); }
+               else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust);     }
+               else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method, adjust);    }
                tag = cluster->getTag();
                
                if (outputDir == "") { outputDir += m->hasPath(distfile); }
index 96b7c08861f216862de0c1e600d07fed463eb56a..5786da220046713d4eb18a26f2aba4529011093c 100644 (file)
@@ -56,6 +56,7 @@ private:
 
        string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile, countfile;
        double cutoff;
+    float adjust;
        string showabund, timing;
        int precision, length;
        ofstream sabundFile, rabundFile, listFile;
index 270ea6208d8ead0683d8cf46fb8d7a417469c243..b02bd20063e9f70bb422fa541403013d6a1976ab 100644 (file)
@@ -1330,9 +1330,10 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile
         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
                
         //create cluster
-        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+        float adjust = -1.0;
+        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); }
+        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); }
+        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust);     }
         tag = cluster->getTag();
                
         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
index 06ed2db6495a0897c74c2b278aac642a15ea1c78..0a3c7b3e42f68347d0315299d5ed050b89ebb796 100644 (file)
@@ -3,8 +3,8 @@
 
 /***********************************************************************/
 
-CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-       Cluster(rav, lv, dm, c, s)
+CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+       Cluster(rav, lv, dm, c, s, a)
 {}
 
 /***********************************************************************/
index 66261e33b922f17be93e3f929582ca9ea7d9208f..7d61f8c265e060f2dc0002c94f69edfd054f89ae 100644 (file)
@@ -190,6 +190,27 @@ void FlowData::capFlows(int mF){
 
 //**********************************************************************************************************************
 
+bool FlowData::hasGoodHomoP(){
+       
+       try{
+        
+        float maxIntensity = (float) maxHomoP + 0.49;
+
+        for(int i=0;i<endFlow;i++){
+            if(flowData[i] > maxIntensity){
+                return 0;
+            }
+        }
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FlowData", "hasMinFlows");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
 bool FlowData::hasMinFlows(int minFlows){
        
        try{
index c7fd08a0a0100841e2805062ae5f44f7e5821d6d..cfe349c6e53a14180d521ae6401ed07381c0cd9f 100644 (file)
@@ -24,6 +24,8 @@ public:
        string getName();
        void capFlows(int);
        bool hasMinFlows(int);
+    bool hasGoodHomoP();
+
        Sequence getSequence();
 
        void printFlows(ofstream&);
index e3287f7f9bb19a4079c28e92fe57cf99e06216d5..97f0afdb366949d39e5af37a96c0211d9df0ca17 100644 (file)
@@ -23,6 +23,7 @@ vector<string> MGClusterCommand::setParameters(){
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
                CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
                CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
+        CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
                CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -40,7 +41,7 @@ vector<string> MGClusterCommand::setParameters(){
 string MGClusterCommand::getHelpString(){      
        try {
                string helpString = "";
-               helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
+               helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty, adjust and hcluster. The blast parameter is required.\n";
                helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
                helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
                helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
@@ -48,6 +49,7 @@ string MGClusterCommand::getHelpString(){
                helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
                helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
                helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
+        helpString += "The adjust parameter is used to handle missing distances.  If you set a cutoff, adjust=f by default.  If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method.  Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
                helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
                helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
                helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
@@ -184,7 +186,10 @@ MGClusterCommand::MGClusterCommand(string option) {
                        precisionLength = temp.length();
                        m->mothurConvert(temp, precision); 
                        
-                       temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
+            cutoffSet = false;
+                       temp = validParameter.validFile(parameters, "cutoff", false);
+            if (temp == "not found") { temp = "0.70"; }
+            else { cutoffSet = true;  }
                        m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
                        
@@ -210,7 +215,12 @@ MGClusterCommand::MGClusterCommand(string option) {
                        hclusterWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
-                       hard = m->isTrue(temp);            
+                       hard = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "adjust", false);                              if (temp == "not found") { if (cutoffSet) { temp = "F"; }else { temp="T"; } }
+            if (m->isNumeric1(temp))    { m->mothurConvert(temp, adjust);   }
+            else if (m->isTrue(temp))   { adjust = 1.0;                     }
+            else                        { adjust = -1.0;                    }
                }
 
        }
@@ -302,9 +312,9 @@ int MGClusterCommand::execute(){
                        delete read;
                
                        //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
+                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
                        cluster->setMapWanted(true);
                        Seq2Bin = cluster->getSeqtoBin();
                        oldSeq2Bin = Seq2Bin;
index 008bd222a0a91869e14895738ea852b383c0395f..3865bb20811a0ac29e29e0fec02392268ea0a657 100644 (file)
@@ -56,9 +56,9 @@ private:
        string blastfile, method, namefile, countfile, overlapFile, distFile, outputDir;
        ofstream sabundFile, rabundFile, listFile;
        double cutoff;
-       float penalty;
+       float penalty, adjust;
        int precision, length, precisionLength;
-       bool abort, minWanted, hclusterWanted, merge, hard;
+       bool abort, minWanted, hclusterWanted, merge, hard, cutoffSet;
        
        void printData(ListVector*);
        ListVector* mergeOPFs(map<string, int>, float);
index dc026decc9db9916513fcbd1d41fe23298173717..a294a77c69a8cd5582a38cf7394c510a1e5ccccb 100644 (file)
@@ -180,18 +180,10 @@ int OtuHierarchyCommand::execute(){
                
                        if (m->control_pressed) {  return 0; }
                        
-                       string names = lists[0].get(i); 
-                       
-                       //parse bin
-                       while (names.find_first_of(',') != -1) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               littleBins[name] = i;  
-                       }
-                       
-                       //get last name
-                       littleBins[names] = i;
-               }
+                       string bin = lists[0].get(i);
+            vector<string> names; m->splitAtComma(bin, names);
+                       for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; }
+        }
                
                ofstream out;
         map<string, string> variables; 
@@ -207,24 +199,19 @@ int OtuHierarchyCommand::execute(){
                
                        if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
                        
-                       string names = lists[1].get(i);
+                       string binnames = lists[1].get(i);
+            vector<string> names; m->splitAtComma(binnames, names);
+            
                        
                        //output column 1
-                       if (output == "name")   {   out << names << '\t';       }
-                       else                                    {       out << i << '\t';               }
+                       if (output == "name")   {   out << binnames << '\t';    }
+                       else                                    {       out << (i+1) << '\t';           }
                        
                        map<int, int> bins; //bin numbers in little that are in this bin in big
                        map<int, int>::iterator it;
                        
                        //parse bin
-                       while (names.find_first_of(',') != -1) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               bins[littleBins[name]] = littleBins[name];  
-                       }
-                       
-                       //get last name
-                       bins[littleBins[names]] = littleBins[names]; 
+                       for (int j = 0; j < names.size(); j++) { bins[littleBins[names[j]]] = littleBins[names[j]];   }
                        
                        string col2 = "";
                        for (it = bins.begin(); it != bins.end(); it++) {
index 26492245e2b9805144125052d57045a834bc3f95..0408b83ebaf0d79f31e5290278fa29e567c1cb87 100644 (file)
@@ -32,22 +32,22 @@ QualityScores::QualityScores(ifstream& qFile){
                m = MothurOut::getInstance();
 
                int score;
-               seqName = getSequenceName(qFile);
+               seqName = getSequenceName(qFile); m->gobble(qFile);
                
         if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n.");  }
         
                if (!m->control_pressed) {
-            string qScoreString = m->getline(qFile);
+            string qScoreString = m->getline(qFile); m->gobble(qFile);
             
             if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n.");  }
             
             while(qFile.peek() != '>' && qFile.peek() != EOF){
                 if (m->control_pressed) { break; }
-                string temp = m->getline(qFile);
+                string temp = m->getline(qFile); m->gobble(qFile);
                 if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n.");  }
                 qScoreString +=  ' ' + temp;
             }
-            //cout << "done reading " << endl; 
+            //cout << "done reading " << endl;
             istringstream qScoreStringStream(qScoreString);
             int count = 0;
             while(!qScoreStringStream.eof()){
@@ -68,7 +68,7 @@ QualityScores::QualityScores(ifstream& qFile){
         }
                
                seqLength = qScores.size();
-               //cout << "seqlength = " << seqLength << '\t' << count << endl;
+               //cout << "seqlength = " << seqLength  << endl;
                
        }
        catch(exception& e) {
index aeaddae1300207aff7ef713858e9ee760be52e12..18a55ac1b584dc0bce91c4f0f950d164e03fb983 100644 (file)
@@ -74,8 +74,8 @@ private:
 
        bool abort;
        string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile, contigsreport, summaryfile;
-       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria, minOverlap, oStart, oEnd, mismatches, maxN, maxInsert;
-    float minSim, minScore;
+       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, minOverlap, oStart, oEnd, mismatches, maxN, maxInsert;
+    float minSim, minScore, criteria;
        vector<string> outputNames;
        vector<string> optimize;
        map<string, int> nameMap;
index 27bb8d95190fb66ea03510cf266d2d53824d988c..e6b037b6bcafafa6c7ccf5aafa9731de3d7ba864 100644 (file)
@@ -364,7 +364,7 @@ int SeqSummaryCommand::execute(){
                int size = startPosition.size();
                
                //find means
-               double meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
+               unsigned long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
                meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
                for (int i = 0; i < size; i++) {
                        meanStartPosition += startPosition[i];
@@ -374,8 +374,9 @@ int SeqSummaryCommand::execute(){
                        meanLongHomoPolymer += longHomoPolymer[i];
                }
                 
-               //this is an int divide so the remainder is lost
-               meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
+        double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer;
+                
+               meanstartPosition /= (double) size; meanendPosition /= (double) size; meanlongHomoPolymer /= (double) size; meanseqLength /= (double) size; meanambigBases /= (double) size;
                                
                int ptile0_25   = int(size * 0.025);
                int ptile25             = int(size * 0.250);
@@ -399,7 +400,7 @@ int SeqSummaryCommand::execute(){
                m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75]) + "\t" + toString(ptile75+1)); m->mothurOutEndLine();
                m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5]) + "\t" + toString(ptile97_5+1)); m->mothurOutEndLine();
                m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
-               m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
+               m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine();
 
                if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
@@ -543,7 +544,7 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
                                }
                                
                                //for each sequence this sequence represents
-                               for (int i = 0; i < num; i++) {
+                               for (int j = 0; j < num; j++) {
                                        startPosition.push_back(current.getStartPos());
                                        endPosition.push_back(current.getEndPos());
                                        seqLength.push_back(current.getNumBases());
index a20679a1b352f0fc95f2329f0e536c54a029cced..4a6e5ff2375ce40560db1aa706edbdacefac0e0d 100644 (file)
@@ -2737,7 +2737,8 @@ int ShhherCommand::cluster(string filename, string distFileName, string namesFil
         
                RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
-               Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
+        float adjust = -1.0;
+               Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest", adjust);
                string tag = cluster->getTag();
                
                double clusterCutoff = cutoff;
index 3af1ea0c346030eb6f8eda3527ffc7a882c1b950..3bed931d89ddf0e11f433916b5d4929694a3c19c 100644 (file)
@@ -5,8 +5,8 @@
 
 /***********************************************************************/
 
-SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-Cluster(rav, lv, dm, c, s)
+SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+Cluster(rav, lv, dm, c, s, a)
 {}
 
 
index 03e9fa8e5b77905d4a4416a6c592d3b3756a02e4..417e24ae94e4242db9a56db14fd2987714ab1712 100644 (file)
@@ -89,6 +89,30 @@ void SparseDistanceMatrix::addCell(ull row, PDistCell cell){
                exit(1);
        }
 }
+/***********************************************************************/
+int SparseDistanceMatrix::addCellSorted(ull row, PDistCell cell){
+       try {
+               numNodes+=2;
+               if(cell.dist < smallDist){ smallDist = cell.dist; }
+        
+        seqVec[row].push_back(cell);
+        PDistCell temp(row, cell.dist);
+        seqVec[cell.index].push_back(temp);
+        
+        sortSeqVec(row);
+        sortSeqVec(cell.index);
+        
+        int location = -1; //find location of new cell when sorted
+        for (int i = 0; i < seqVec[row].size(); i++) {  if (seqVec[row][i].index == cell.index) { location = i; break; } }
+        
+        return location;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "addCellSorted");
+               exit(1);
+       }
+}
+
 /***********************************************************************/
 
 ull SparseDistanceMatrix::getSmallestCell(ull& row){
@@ -151,3 +175,18 @@ int SparseDistanceMatrix::sortSeqVec(){
 }
 /***********************************************************************/
 
+int SparseDistanceMatrix::sortSeqVec(int index){
+       try {
+        
+        //saves time in getSmallestCell, by making it so you dont search the repeats
+        sort(seqVec[index].begin(), seqVec[index].end(), compareIndexes);
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
index f18fdd7db9a0351ba59aa38adabf3365fe0221c2..6c6bbe5a03d2c378c146539b3c20d5f3fd8755c3 100644 (file)
@@ -46,17 +46,21 @@ public:
     void resize(ull n) { seqVec.resize(n);  }
     void clear();
        void addCell(ull, PDistCell);
+    int addCellSorted(ull, PDistCell);
     vector<vector<PDistCell> > seqVec;
     
+    
 private:
        PDistCell smallCell;                            //The cell with the smallest distance
        int numNodes;
     
     bool sorted;
     int sortSeqVec();
+    int sortSeqVec(int);
        float smallDist, aboveCutoff;
     
        MothurOut* m;
+
 };
 
 /***********************************************************************/
index aebb16b42a6455693b7c491f83b1740c58acd0e0..2a163516952c27f8a3dc7411536023f69e1f5b08 100644 (file)
@@ -435,7 +435,11 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                success = 0;
                                trashCode += 'l';
                        }
-                       
+            if(!flowData.hasGoodHomoP()){      //screen to see if sequence meets the maximum homopolymer limit
+                               success = 0;
+                               trashCode += 'h';
+                       }
+
                        int primerIndex = 0;
                        int barcodeIndex = 0;
                        
index c1e4d51be52b95b7eec757930532a8d231f07ea5..b79986bd159866cd5b80aec76c570f7acac6e092 100644 (file)
@@ -10,8 +10,8 @@
 
 /***********************************************************************/
 
-WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
-       Cluster(rav, lv, dm, c, s)
+WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) :
+       Cluster(rav, lv, dm, c, s, a)
 {
        saveRow = -1;
        saveCol = -1;